From 1c81adf362ec79750850dc5ecb0bf3e60399e54f Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 12 Dec 2019 14:11:27 -0800 Subject: [PATCH] [VectorOps] Add lowering of vector.shuffle to LLVM IR For example, a shuffle %1 = vector.shuffle %arg0, %arg1 [0 : i32, 1 : i32] : vector<2xf32>, vector<2xf32> becomes a direct LLVM shuffle 0 = llvm.shufflevector %arg0, %arg1 [0 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> but %1 = vector.shuffle %a, %b[1 : i32, 0 : i32, 2: i32] : vector<1x4xf32>, vector<2x4xf32> becomes the more elaborate (note the index permutation that drives argument selection for the extract operations) %0 = llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> %1 = llvm.extractvalue %arg1[0] : !llvm<"[2 x <4 x float>]"> %2 = llvm.insertvalue %1, %0[0] : !llvm<"[3 x <4 x float>]"> %3 = llvm.extractvalue %arg0[0] : !llvm<"[1 x <4 x float>]"> %4 = llvm.insertvalue %3, %2[1] : !llvm<"[3 x <4 x float>]"> %5 = llvm.extractvalue %arg1[1] : !llvm<"[2 x <4 x float>]"> %6 = llvm.insertvalue %5, %4[2] : !llvm<"[3 x <4 x float>]"> PiperOrigin-RevId: 285268164 --- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 169 ++++++--- .../Conversion/VectorToLLVM/vector-to-llvm.mlir | 380 ++++++++++++--------- 2 files changed, 329 insertions(+), 220 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 17fb933..d4c27a6 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -61,6 +61,38 @@ static VectorType reducedVectorTypeBack(VectorType tp) { return VectorType::get(tp.getShape().take_back(), tp.getElementType()); } +// Helper that picks the proper sequence for inserting. +static Value *insertOne(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &lowering, Location loc, Value *val1, + Value *val2, Type llvmType, int64_t rank, int64_t pos) { + if (rank == 1) { + auto idxType = rewriter.getIndexType(); + auto constant = rewriter.create( + loc, lowering.convertType(idxType), + rewriter.getIntegerAttr(idxType, pos)); + return rewriter.create(loc, llvmType, val1, val2, + constant); + } + return rewriter.create(loc, llvmType, val1, val2, + rewriter.getI64ArrayAttr(pos)); +} + +// Helper that picks the proper sequence for extracting. +static Value *extractOne(ConversionPatternRewriter &rewriter, + LLVMTypeConverter &lowering, Location loc, Value *val, + Type llvmType, int64_t rank, int64_t pos) { + if (rank == 1) { + auto idxType = rewriter.getIndexType(); + auto constant = rewriter.create( + loc, lowering.convertType(idxType), + rewriter.getIntegerAttr(idxType, pos)); + return rewriter.create(loc, llvmType, val, + constant); + } + return rewriter.create(loc, llvmType, val, + rewriter.getI64ArrayAttr(pos)); +} + class VectorBroadcastOpConversion : public LLVMOpLowering { public: explicit VectorBroadcastOpConversion(MLIRContext *context, @@ -77,11 +109,12 @@ public: return matchFailure(); // Rewrite when the full vector type can be lowered (which // implies all 'reduced' types can be lowered too). + auto adaptor = vector::BroadcastOpOperandAdaptor(operands); VectorType srcVectorType = broadcastOp.getSourceType().dyn_cast(); rewriter.replaceOp( - op, expandRanks(operands[0], // source value to be expanded - op->getLoc(), // location of original broadcast + op, expandRanks(adaptor.source(), // source value to be expanded + op->getLoc(), // location of original broadcast srcVectorType, dstVectorType, rewriter)); return matchSuccess(); } @@ -142,7 +175,8 @@ private: assert((llvmType != nullptr) && "unlowerable vector type"); if (rank == 1) { Value *undef = rewriter.create(loc, llvmType); - Value *expand = insertOne(undef, value, loc, llvmType, rank, 0, rewriter); + Value *expand = + insertOne(rewriter, lowering, loc, undef, value, llvmType, rank, 0); SmallVector zeroValues(dim, 0); return rewriter.create( loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues)); @@ -152,7 +186,8 @@ private: reducedVectorTypeFront(dstVectorType), rewriter); Value *result = rewriter.create(loc, llvmType); for (int64_t d = 0; d < dim; ++d) { - result = insertOne(result, expand, loc, llvmType, rank, d, rewriter); + result = + insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d); } return result; } @@ -182,62 +217,86 @@ private: Value *result = rewriter.create(loc, llvmType); bool atStretch = dim != srcVectorType.getDimSize(0); if (rank == 1) { + assert(atStretch); Type redLlvmType = lowering.convertType(dstVectorType.getElementType()); - if (atStretch) { - Value *one = extractOne(value, loc, redLlvmType, rank, 0, rewriter); - Value *expand = - insertOne(result, one, loc, llvmType, rank, 0, rewriter); - SmallVector zeroValues(dim, 0); - return rewriter.create( - loc, expand, result, rewriter.getI32ArrayAttr(zeroValues)); - } - for (int64_t d = 0; d < dim; ++d) { - Value *one = extractOne(value, loc, redLlvmType, rank, d, rewriter); - result = insertOne(result, one, loc, llvmType, rank, d, rewriter); - } - } else { - VectorType redSrcType = reducedVectorTypeFront(srcVectorType); - VectorType redDstType = reducedVectorTypeFront(dstVectorType); - Type redLlvmType = lowering.convertType(redSrcType); - for (int64_t d = 0; d < dim; ++d) { - int64_t pos = atStretch ? 0 : d; - Value *one = extractOne(value, loc, redLlvmType, rank, pos, rewriter); - Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter); - result = insertOne(result, expand, loc, llvmType, rank, d, rewriter); - } + Value *one = + extractOne(rewriter, lowering, loc, value, redLlvmType, rank, 0); + Value *expand = + insertOne(rewriter, lowering, loc, result, one, llvmType, rank, 0); + SmallVector zeroValues(dim, 0); + return rewriter.create( + loc, expand, result, rewriter.getI32ArrayAttr(zeroValues)); + } + VectorType redSrcType = reducedVectorTypeFront(srcVectorType); + VectorType redDstType = reducedVectorTypeFront(dstVectorType); + Type redLlvmType = lowering.convertType(redSrcType); + for (int64_t d = 0; d < dim; ++d) { + int64_t pos = atStretch ? 0 : d; + Value *one = + extractOne(rewriter, lowering, loc, value, redLlvmType, rank, pos); + Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter); + result = + insertOne(rewriter, lowering, loc, result, expand, llvmType, rank, d); } return result; } +}; - // Picks the proper sequence for inserting. - Value *insertOne(Value *val1, Value *val2, Location loc, Type llvmType, - int64_t rank, int64_t pos, - ConversionPatternRewriter &rewriter) const { - if (rank == 1) { - auto idxType = rewriter.getIndexType(); - auto constant = rewriter.create( - loc, lowering.convertType(idxType), - rewriter.getIntegerAttr(idxType, pos)); - return rewriter.create(loc, llvmType, val1, val2, - constant); +class VectorShuffleOpConversion : public LLVMOpLowering { +public: + explicit VectorShuffleOpConversion(MLIRContext *context, + LLVMTypeConverter &typeConverter) + : LLVMOpLowering(vector::ShuffleOp::getOperationName(), context, + typeConverter) {} + + PatternMatchResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + auto adaptor = vector::ShuffleOpOperandAdaptor(operands); + auto shuffleOp = cast(op); + auto v1Type = shuffleOp.getV1VectorType(); + auto v2Type = shuffleOp.getV2VectorType(); + auto vectorType = shuffleOp.getVectorType(); + Type llvmType = lowering.convertType(vectorType); + auto maskArrayAttr = shuffleOp.mask(); + + // Bail if result type cannot be lowered. + if (!llvmType) + return matchFailure(); + + // Get rank and dimension sizes. + int64_t rank = vectorType.getRank(); + assert(v1Type.getRank() == rank); + assert(v2Type.getRank() == rank); + int64_t v1Dim = v1Type.getDimSize(0); + + // For rank 1, where both operands have *exactly* the same vector type, + // there is direct shuffle support in LLVM. Use it! + if (rank == 1 && v1Type == v2Type) { + Value *shuffle = rewriter.create( + loc, adaptor.v1(), adaptor.v2(), maskArrayAttr); + rewriter.replaceOp(op, shuffle); + return matchSuccess(); } - return rewriter.create(loc, llvmType, val1, val2, - rewriter.getI64ArrayAttr(pos)); - } - // Picks the proper sequence for extracting. - Value *extractOne(Value *value, Location loc, Type llvmType, int64_t rank, - int64_t pos, ConversionPatternRewriter &rewriter) const { - if (rank == 1) { - auto idxType = rewriter.getIndexType(); - auto constant = rewriter.create( - loc, lowering.convertType(idxType), - rewriter.getIntegerAttr(idxType, pos)); - return rewriter.create(loc, llvmType, value, - constant); + // For all other cases, insert the individual values individually. + Value *insert = rewriter.create(loc, llvmType); + int64_t insPos = 0; + for (auto en : llvm::enumerate(maskArrayAttr)) { + int64_t extPos = en.value().cast().getInt(); + Value *value = adaptor.v1(); + if (extPos >= v1Dim) { + extPos -= v1Dim; + value = adaptor.v2(); + } + Value *extract = + extractOne(rewriter, lowering, loc, value, llvmType, rank, extPos); + insert = insertOne(rewriter, lowering, loc, insert, extract, llvmType, + rank, insPos++); } - return rewriter.create(loc, llvmType, value, - rewriter.getI64ArrayAttr(pos)); + rewriter.replaceOp(op, insert); + return matchSuccess(); } }; @@ -506,9 +565,9 @@ public: /// Populate the given list with patterns that convert from Vector to LLVM. void mlir::populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { - patterns.insert( + patterns.insert( converter.getDialect()->getContext(), converter); } diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 28c21f6..0c4b23f 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -4,231 +4,281 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2xf32> return %0 : vector<2xf32> } -// CHECK-LABEL: broadcast_vec1d_from_scalar -// CHECK: llvm.mlir.undef : !llvm<"<2 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.return {{.*}} : !llvm<"<2 x float>"> +// CHECK-LABEL: broadcast_vec1d_from_scalar +// CHECK: llvm.mlir.undef : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.return {{.*}} : !llvm<"<2 x float>"> func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32> return %0 : vector<2x3xf32> } -// CHECK-LABEL: broadcast_vec2d_from_scalar -// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> +// CHECK-LABEL: broadcast_vec2d_from_scalar +// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> +// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32> return %0 : vector<2x3x4xf32> } -// CHECK-LABEL: broadcast_vec3d_from_scalar -// CHECK: llvm.mlir.undef : !llvm<"<4 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> -// CHECK: llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[2 x [3 x <4 x float>]]"> +// CHECK-LABEL: broadcast_vec3d_from_scalar +// CHECK: llvm.mlir.undef : !llvm<"<4 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> +// CHECK: llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"[2 x [3 x <4 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x [3 x <4 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x [3 x <4 x float>]]"> +// CHECK: llvm.return {{.*}} : !llvm<"[2 x [3 x <4 x float>]]"> func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32> return %0 : vector<2xf32> } -// CHECK-LABEL: broadcast_vec1d_from_vec1d -// CHECK: llvm.return {{.*}} : !llvm<"<2 x float>"> +// CHECK-LABEL: broadcast_vec1d_from_vec1d +// CHECK: llvm.return {{.*}} : !llvm<"<2 x float>"> func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32> return %0 : vector<3x2xf32> } -// CHECK-LABEL: broadcast_vec2d_from_vec1d -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[3 x <2 x float>]"> +// CHECK-LABEL: broadcast_vec2d_from_vec1d +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[3 x <2 x float>]"> func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: broadcast_vec3d_from_vec1d -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-LABEL: broadcast_vec3d_from_vec1d +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: broadcast_vec3d_from_vec2d -// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-LABEL: broadcast_vec3d_from_vec2d +// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> return %0 : vector<4xf32> } -// CHECK-LABEL: broadcast_stretch -// CHECK: llvm.mlir.undef : !llvm<"<4 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> -// CHECK: llvm.return {{.*}} : !llvm<"<4 x float>"> +// CHECK-LABEL: broadcast_stretch +// CHECK: llvm.mlir.undef : !llvm<"<4 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> +// CHECK: llvm.return {{.*}} : !llvm<"<4 x float>"> func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> { %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32> return %0 : vector<3x4xf32> } -// CHECK-LABEL: broadcast_stretch_at_start -// CHECK: llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[3 x <4 x float>]"> +// CHECK-LABEL: broadcast_stretch_at_start +// CHECK: llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[3 x <4 x float>]"> func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> { %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32> return %0 : vector<4x3xf32> } -// CHECK-LABEL: broadcast_stretch_at_end -// CHECK: llvm.mlir.undef : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[4 x <1 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[4 x <1 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <1 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <1 x float>]"> -// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> -// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x <3 x float>]"> +// CHECK-LABEL: broadcast_stretch_at_end +// CHECK: llvm.mlir.undef : !llvm<"[4 x <3 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[4 x <1 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x <3 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[4 x <1 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x <3 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <1 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x <3 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <1 x float>]"> +// CHECK: llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x <3 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[4 x <3 x float>]"> func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: broadcast_stretch_in_middle -// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-LABEL: broadcast_stretch_in_middle +// CHECK: llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[4 x [1 x <2 x float>]]"> +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[4 x [1 x <2 x float>]]"> +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x [1 x <2 x float>]]"> +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x [1 x <2 x float>]]"> +// CHECK: llvm.mlir.undef : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK: llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]"> func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } -// CHECK-LABEL: outerproduct -// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.fmul {{.*}}, {{.*}} : !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.shufflevector {{.*}} [1 : i32, 1 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.fmul {{.*}}, {{.*}} : !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> +// CHECK-LABEL: outerproduct +// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.fmul {{.*}}, {{.*}} : !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.shufflevector {{.*}} [1 : i32, 1 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.fmul {{.*}}, {{.*}} : !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } -// CHECK-LABEL: outerproduct_add -// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> -// CHECK: "llvm.intr.fmuladd"({{.*}}) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -> !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.shufflevector {{.*}} [1 : i32, 1 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> -// CHECK: "llvm.intr.fmuladd"({{.*}}) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -> !llvm<"<3 x float>"> -// CHECK: llvm.insertvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> +// CHECK-LABEL: outerproduct_add +// CHECK: llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> +// CHECK: "llvm.intr.fmuladd"({{.*}}) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -> !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}[0] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.shufflevector {{.*}} [1 : i32, 1 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> +// CHECK: "llvm.intr.fmuladd"({{.*}}) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -> !llvm<"<3 x float>"> +// CHECK: llvm.insertvalue {{.*}}[1] : !llvm<"[2 x <3 x float>]"> +// CHECK: llvm.return {{.*}} : !llvm<"[2 x <3 x float>]"> + +func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2xf32> { + %1 = vector.shuffle %arg0, %arg1 [0 : i32, 1 : i32] : vector<2xf32>, vector<2xf32> + return %1 : vector<2xf32> +} +// CHECK-LABEL: shuffle_1D_direct(%arg0: !llvm<"<2 x float>">, %arg1: !llvm<"<2 x float>">) +// CHECK: %[[s:.*]] = llvm.shufflevector %arg0, %arg1 [0 : i32, 1 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> +// CHECK: llvm.return %[[s]] : !llvm<"<2 x float>"> + +func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { + %1 = vector.shuffle %arg0, %arg1 [4 : i32, 3 : i32, 2 : i32, 1 : i32, 0 : i32] : vector<2xf32>, vector<3xf32> + return %1 : vector<5xf32> +} +// CHECK-LABEL: shuffle_1D(%arg0: !llvm<"<2 x float>">, %arg1: !llvm<"<3 x float>">) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm<"<5 x float>"> +// CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK: %[[e1:.*]] = llvm.extractelement %arg1[%[[c2]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: %[[e2:.*]] = llvm.extractelement %arg1[%[[c1]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: %[[e3:.*]] = llvm.extractelement %arg1[%[[c0]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: %[[e4:.*]] = llvm.extractelement %arg0[%[[c1]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[c3:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: %[[e5:.*]] = llvm.extractelement %arg0[%[[c0]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 +// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: llvm.return %[[i5]] : !llvm<"<5 x float>"> + +func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> { + %1 = vector.shuffle %a, %b[1 : i32, 0 : i32, 2: i32] : vector<1x4xf32>, vector<2x4xf32> + return %1 : vector<3x4xf32> +} +// CHECK-LABEL: shuffle_2D(%arg0: !llvm<"[1 x <4 x float>]">, %arg1: !llvm<"[2 x <4 x float>]">) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> +// CHECK: %[[e1:.*]] = llvm.extractvalue %arg1[0] : !llvm<"[2 x <4 x float>]"> +// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm<"[3 x <4 x float>]"> +// CHECK: %[[e2:.*]] = llvm.extractvalue %arg0[0] : !llvm<"[1 x <4 x float>]"> +// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm<"[3 x <4 x float>]"> +// CHECK: %[[e3:.*]] = llvm.extractvalue %arg1[1] : !llvm<"[2 x <4 x float>]"> +// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm<"[3 x <4 x float>]"> +// CHECK: llvm.return %[[i3]] : !llvm<"[3 x <4 x float>]"> func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { %0 = vector.extract %arg0[15 : i32]: vector<16xf32> -- 2.7.4