From e0b7bee7cf8ece5ec41c8343b61e1daf651e4f3a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 22 Nov 2021 19:23:18 +0000 Subject: [PATCH] Revert "[mlir][Vector] Add a vblendps-based impl for transpose8x8 (both intrin and inline_asm)" This reverts commit a9e236bed835c58be381dadb973a1db0681e4795. This broke the Windows build: mlir\include\mlir/Dialect/X86Vector/Transforms.h(28): error C2061: syntax error: identifier 'uint' --- mlir/include/mlir/Dialect/X86Vector/Transforms.h | 63 +++------------- .../Dialect/X86Vector/Transforms/AVXTranspose.cpp | 86 +++++----------------- .../Dialect/Vector/vector-transpose-lowering.mlir | 27 ++++--- .../LLVMIR/CPU/X86/test-inline-asm-vector.mlir | 56 -------------- mlir/test/lib/Dialect/Vector/CMakeLists.txt | 2 - .../lib/Dialect/Vector/TestVectorTransforms.cpp | 5 -- .../llvm-project-overlay/mlir/test/BUILD.bazel | 2 - 7 files changed, 45 insertions(+), 196 deletions(-) delete mode 100644 mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir diff --git a/mlir/include/mlir/Dialect/X86Vector/Transforms.h b/mlir/include/mlir/Dialect/X86Vector/Transforms.h index 187ee79..66749d55 100644 --- a/mlir/include/mlir/Dialect/X86Vector/Transforms.h +++ b/mlir/include/mlir/Dialect/X86Vector/Transforms.h @@ -23,43 +23,19 @@ namespace x86vector { /// Helper class to factor out the creation and extraction of masks from nibs. struct MaskHelper { - /// b0 captures the lowest bit, b7 captures the highest bit. - /// Meant to be used with instructions such as mm256BlendPs. - template - static uint8_t blend() { - static_assert(b0 <= 1 && b1 <= 1 && b2 <= 1 && b3 <= 1, "overflow"); - static_assert(b4 <= 1 && b5 <= 1 && b6 <= 1 && b7 <= 1, "overflow"); - return static_cast((b7 << 7) | (b6 << 6) | (b5 << 5) | (b4 << 4) | - (b3 << 3) | (b2 << 2) | (b1 << 1) | b0); - } - /// b0 captures the lowest bit, b7 captures the highest bit. - /// Meant to be used with instructions such as mm256BlendPs. - static void extractBlend(uint8_t mask, uint8_t &b0, uint8_t &b1, uint8_t &b2, - uint8_t &b3, uint8_t &b4, uint8_t &b5, uint8_t &b6, - uint8_t &b7) { - b7 = mask & (1 << 7); - b6 = mask & (1 << 6); - b5 = mask & (1 << 5); - b4 = mask & (1 << 4); - b3 = mask & (1 << 3); - b2 = mask & (1 << 2); - b1 = mask & (1 << 1); - b0 = mask & 1; - } /// b01 captures the lower 2 bits, b67 captures the higher 2 bits. /// Meant to be used with instructions such as mm256ShufflePs. template - static uint8_t shuffle() { + static int8_t shuffle() { static_assert(b01 <= 0x03, "overflow"); static_assert(b23 <= 0x03, "overflow"); static_assert(b45 <= 0x03, "overflow"); static_assert(b67 <= 0x03, "overflow"); - return static_cast((b67 << 6) | (b45 << 4) | (b23 << 2) | b01); + return static_cast((b67 << 6) | (b45 << 4) | (b23 << 2) | b01); } /// b01 captures the lower 2 bits, b67 captures the higher 2 bits. - static void extractShuffle(uint8_t mask, uint8_t &b01, uint8_t &b23, - uint8_t &b45, uint8_t &b67) { + static void extractShuffle(int8_t mask, int8_t &b01, int8_t &b23, int8_t &b45, + int8_t &b67) { b67 = (mask & (0x03 << 6)) >> 6; b45 = (mask & (0x03 << 4)) >> 4; b23 = (mask & (0x03 << 2)) >> 2; @@ -68,13 +44,13 @@ struct MaskHelper { /// b03 captures the lower 4 bits, b47 captures the higher 4 bits. /// Meant to be used with instructions such as mm256Permute2f128Ps. template - static uint8_t permute() { + static int8_t permute() { static_assert(b03 <= 0x0f, "overflow"); static_assert(b47 <= 0x0f, "overflow"); - return static_cast((b47 << 4) + b03); + return static_cast((b47 << 4) + b03); } /// b03 captures the lower 4 bits, b47 captures the higher 4 bits. - static void extractPermute(uint8_t mask, uint8_t &b03, uint8_t &b47) { + static void extractPermute(int8_t mask, int8_t &b03, int8_t &b47) { b47 = (mask & (0x0f << 4)) >> 4; b03 = mask & 0x0f; } @@ -94,20 +70,6 @@ struct MaskHelper { namespace avx2 { -namespace inline_asm { -//===----------------------------------------------------------------------===// -/// Methods in the inline_asm namespace emit calls to LLVM::InlineAsmOp. -//===----------------------------------------------------------------------===// -/// If bit i of `mask` is zero, take f32@i from v1 else take it from v2. -Value mm256BlendPsAsm(ImplicitLocOpBuilder &b, Value v1, Value v2, - uint8_t mask); - -} // namespace inline_asm - -namespace intrin { -//===----------------------------------------------------------------------===// -/// Methods in the intrin namespace emulate clang's impl. of X86 intrinsics. -//===----------------------------------------------------------------------===// /// Lower to vector.shuffle v1, v2, [0, 8, 1, 9, 4, 12, 5, 13]. Value mm256UnpackLoPs(ImplicitLocOpBuilder &b, Value v1, Value v2); @@ -118,7 +80,7 @@ Value mm256UnpackHiPs(ImplicitLocOpBuilder &b, Value v1, Value v2); /// Take an 8 bit mask, 2 bit for each position of a[0, 3) **and** b[0, 4): /// 0:127 | 128:255 /// b01 b23 C8 D8 | b01+4 b23+4 C8+4 D8+4 -Value mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask); +Value mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1, Value v2, int8_t mask); // imm[0:1] out of imm[0:3] is: // 0 1 2 3 @@ -127,15 +89,8 @@ Value mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask); // 0 1 2 3 // imm[0:1] out of imm[4:7]. Value mm256Permute2f128Ps(ImplicitLocOpBuilder &b, Value v1, Value v2, - uint8_t mask); - -/// If bit i of `mask` is zero, take f32@i from v1 else take it from v2. -Value mm256BlendPs(ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask); -} // namespace intrin + int8_t mask); -//===----------------------------------------------------------------------===// -/// Generic lowerings may either use intrin or inline_asm depending on needs. -//===----------------------------------------------------------------------===// /// 4x8xf32-specific AVX2 transpose lowering. void transpose4x8xf32(ImplicitLocOpBuilder &ib, MutableArrayRef vs); diff --git a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp index 38088e1..981413b 100644 --- a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp +++ b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp @@ -11,43 +11,25 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormatVariadic.h" using namespace mlir; using namespace mlir::vector; using namespace mlir::x86vector; using namespace mlir::x86vector::avx2; -using namespace mlir::x86vector::avx2::inline_asm; -using namespace mlir::x86vector::avx2::intrin; - -Value mlir::x86vector::avx2::inline_asm::mm256BlendPsAsm( - ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask) { - auto asmDialectAttr = - LLVM::AsmDialectAttr::get(b.getContext(), LLVM::AsmDialect::AD_Intel); - auto asmTp = "vblendps $0, $1, $2, {0}"; - auto asmCstr = "=x,x,x"; // Careful: constraint parser is very brittle: no ws! - SmallVector asmVals{v1, v2}; - auto asmStr = llvm::formatv(asmTp, llvm::format_hex(mask, /*width=*/2)).str(); - auto asmOp = b.create( - v1.getType(), asmVals, asmStr, asmCstr, false, false, asmDialectAttr); - return asmOp.getResult(0); -} -Value mlir::x86vector::avx2::intrin::mm256UnpackLoPs(ImplicitLocOpBuilder &b, - Value v1, Value v2) { +Value mlir::x86vector::avx2::mm256UnpackLoPs(ImplicitLocOpBuilder &b, Value v1, + Value v2) { return b.create( v1, v2, ArrayRef{0, 8, 1, 9, 4, 12, 5, 13}); } -Value mlir::x86vector::avx2::intrin::mm256UnpackHiPs(ImplicitLocOpBuilder &b, - Value v1, Value v2) { +Value mlir::x86vector::avx2::mm256UnpackHiPs(ImplicitLocOpBuilder &b, Value v1, + Value v2) { return b.create( v1, v2, ArrayRef{2, 10, 3, 11, 6, 14, 7, 15}); } @@ -55,10 +37,9 @@ Value mlir::x86vector::avx2::intrin::mm256UnpackHiPs(ImplicitLocOpBuilder &b, /// Takes an 8 bit mask, 2 bit for each position of a[0, 3) **and** b[0, 4): /// 0:127 | 128:255 /// b01 b23 C8 D8 | b01+4 b23+4 C8+4 D8+4 -Value mlir::x86vector::avx2::intrin::mm256ShufflePs(ImplicitLocOpBuilder &b, - Value v1, Value v2, - uint8_t mask) { - uint8_t b01, b23, b45, b67; +Value mlir::x86vector::avx2::mm256ShufflePs(ImplicitLocOpBuilder &b, Value v1, + Value v2, int8_t mask) { + int8_t b01, b23, b45, b67; MaskHelper::extractShuffle(mask, b01, b23, b45, b67); SmallVector shuffleMask{b01, b23, b45 + 8, b67 + 8, b01 + 4, b23 + 4, b45 + 8 + 4, b67 + 8 + 4}; @@ -71,10 +52,11 @@ Value mlir::x86vector::avx2::intrin::mm256ShufflePs(ImplicitLocOpBuilder &b, // a[0:127] or a[128:255] or b[0:127] or b[128:255] // 0 1 2 3 // imm[0:1] out of imm[4:7]. -Value mlir::x86vector::avx2::intrin::mm256Permute2f128Ps( - ImplicitLocOpBuilder &b, Value v1, Value v2, uint8_t mask) { +Value mlir::x86vector::avx2::mm256Permute2f128Ps(ImplicitLocOpBuilder &b, + Value v1, Value v2, + int8_t mask) { SmallVector shuffleMask; - auto appendToMask = [&](uint8_t control) { + auto appendToMask = [&](int8_t control) { if (control == 0) llvm::append_range(shuffleMask, ArrayRef{0, 1, 2, 3}); else if (control == 1) @@ -86,25 +68,13 @@ Value mlir::x86vector::avx2::intrin::mm256Permute2f128Ps( else llvm_unreachable("control > 3 : overflow"); }; - uint8_t b03, b47; + int8_t b03, b47; MaskHelper::extractPermute(mask, b03, b47); appendToMask(b03); appendToMask(b47); return b.create(v1, v2, shuffleMask); } -/// If bit i of `mask` is zero, take f32@i from v1 else take it from v2. -Value mlir::x86vector::avx2::intrin::mm256BlendPs(ImplicitLocOpBuilder &b, - Value v1, Value v2, - uint8_t mask) { - SmallVector shuffleMask; - for (int i = 0; i < 8; ++i) { - bool isSet = mask & (1 << i); - shuffleMask.push_back(!isSet ? i : i + 8); - } - return b.create(v1, v2, shuffleMask); -} - /// AVX2 4x8xf32-specific transpose lowering using a "C intrinsics" model. void mlir::x86vector::avx2::transpose4x8xf32(ImplicitLocOpBuilder &ib, MutableArrayRef vs) { @@ -148,30 +118,14 @@ void mlir::x86vector::avx2::transpose8x8xf32(ImplicitLocOpBuilder &ib, Value T5 = mm256UnpackHiPs(ib, vs[4], vs[5]); Value T6 = mm256UnpackLoPs(ib, vs[6], vs[7]); Value T7 = mm256UnpackHiPs(ib, vs[6], vs[7]); - - using inline_asm::mm256BlendPsAsm; - Value sh0 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<1, 0, 3, 2>()); - Value sh2 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<1, 0, 3, 2>()); - Value sh4 = mm256ShufflePs(ib, T4, T6, MaskHelper::shuffle<1, 0, 3, 2>()); - Value sh6 = mm256ShufflePs(ib, T5, T7, MaskHelper::shuffle<1, 0, 3, 2>()); - - Value S0 = - mm256BlendPsAsm(ib, T0, sh0, MaskHelper::blend<0, 0, 1, 1, 0, 0, 1, 1>()); - Value S1 = - mm256BlendPsAsm(ib, T2, sh0, MaskHelper::blend<1, 1, 0, 0, 1, 1, 0, 0>()); - Value S2 = - mm256BlendPsAsm(ib, T1, sh2, MaskHelper::blend<0, 0, 1, 1, 0, 0, 1, 1>()); - Value S3 = - mm256BlendPsAsm(ib, T3, sh2, MaskHelper::blend<1, 1, 0, 0, 1, 1, 0, 0>()); - Value S4 = - mm256BlendPsAsm(ib, T4, sh4, MaskHelper::blend<0, 0, 1, 1, 0, 0, 1, 1>()); - Value S5 = - mm256BlendPsAsm(ib, T6, sh4, MaskHelper::blend<1, 1, 0, 0, 1, 1, 0, 0>()); - Value S6 = - mm256BlendPsAsm(ib, T5, sh6, MaskHelper::blend<0, 0, 1, 1, 0, 0, 1, 1>()); - Value S7 = - mm256BlendPsAsm(ib, T7, sh6, MaskHelper::blend<1, 1, 0, 0, 1, 1, 0, 0>()); - + Value S0 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<1, 0, 1, 0>()); + Value S1 = mm256ShufflePs(ib, T0, T2, MaskHelper::shuffle<3, 2, 3, 2>()); + Value S2 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<1, 0, 1, 0>()); + Value S3 = mm256ShufflePs(ib, T1, T3, MaskHelper::shuffle<3, 2, 3, 2>()); + Value S4 = mm256ShufflePs(ib, T4, T6, MaskHelper::shuffle<1, 0, 1, 0>()); + Value S5 = mm256ShufflePs(ib, T4, T6, MaskHelper::shuffle<3, 2, 3, 2>()); + Value S6 = mm256ShufflePs(ib, T5, T7, MaskHelper::shuffle<1, 0, 1, 0>()); + Value S7 = mm256ShufflePs(ib, T5, T7, MaskHelper::shuffle<3, 2, 3, 2>()); vs[0] = mm256Permute2f128Ps(ib, S0, S4, MaskHelper::permute<2, 0>()); vs[1] = mm256Permute2f128Ps(ib, S1, S5, MaskHelper::permute<2, 0>()); vs[2] = mm256Permute2f128Ps(ib, S2, S6, MaskHelper::permute<2, 0>()); diff --git a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir index cc62eaa..2bb66d8 100644 --- a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir @@ -80,17 +80,22 @@ func @transpose8x8xf32(%arg0: vector<8x8xf32>) -> vector<8x8xf32> { // AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32> // AVX2-NEXT: vector.shuffle {{.*}} [0, 8, 1, 9, 4, 12, 5, 13] : vector<8xf32>, vector<8xf32> // AVX2-NEXT: vector.shuffle {{.*}} [2, 10, 3, 11, 6, 14, 7, 15] : vector<8xf32>, vector<8xf32> - // AVX2-COUNT-4: vector.shuffle {{.*}} [2, 3, 8, 9, 6, 7, 12, 13] : vector<8xf32>, vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0xcc", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0x33", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0xcc", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0x33", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0xcc", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0x33", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0xcc", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-NEXT: llvm.inline_asm asm_dialect = intel "vblendps $0, $1, $2, 0x33", "=x,x,x" {{.*}} : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - // AVX2-COUNT-4: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32> - // AVX2-COUNT-4: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 8, 9, 4, 5, 12, 13] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [2, 3, 10, 11, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [0, 1, 2, 3, 8, 9, 10, 11] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32> + // AVX2-NEXT: vector.shuffle {{.*}} [4, 5, 6, 7, 12, 13, 14, 15] : vector<8xf32>, vector<8xf32> %0 = vector.transpose %arg0, [1, 0] : vector<8x8xf32> to vector<8x8xf32> return %0 : vector<8x8xf32> } diff --git a/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir b/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir deleted file mode 100644 index 5d5bbdc..0000000 --- a/mlir/test/Integration/Dialect/LLVMIR/CPU/X86/test-inline-asm-vector.mlir +++ /dev/null @@ -1,56 +0,0 @@ -// RUN: mlir-opt %s -convert-vector-to-llvm | \ -// RUN: mlir-cpu-runner -e entry_point_with_all_constants -entry-point-result=void \ -// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext - -module { - llvm.func @function_to_run(%a: vector<8xf32>, %b: vector<8xf32>) { - // CHECK: ( 8, 10, 12, 14, 16, 18, 20, 22 ) - %r0 = llvm.inline_asm asm_dialect = intel - "vaddps $0, $1, $2", "=x,x,x" %a, %b: - (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - vector.print %r0: vector<8xf32> - - // vblendps implemented with inline_asm. - // CHECK: ( 0, 1, 10, 11, 4, 5, 14, 15 ) - %r1 = llvm.inline_asm asm_dialect = intel - "vblendps $0, $1, $2, 0xCC", "=x,x,x" %a, %b: - (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - vector.print %r1: vector<8xf32> - - // vblendps 0xCC via vector.shuffle (emulates clang intrinsics impl) - // CHECK: ( 0, 1, 10, 11, 4, 5, 14, 15 ) - %r2 = vector.shuffle %a, %b[0, 1, 10, 11, 4, 5, 14, 15] - : vector<8xf32>, vector<8xf32> - vector.print %r2: vector<8xf32> - - // vblendps 0x33 implemented with inline_asm. - // CHECK: ( 8, 9, 2, 3, 12, 13, 6, 7 ) - %r3 = llvm.inline_asm asm_dialect = intel - "vblendps $0, $1, $2, 0x33", "=x,x,x" %a, %b: - (vector<8xf32>, vector<8xf32>) -> vector<8xf32> - vector.print %r3: vector<8xf32> - - // vblendps 0x33 via vector.shuffle (emulates clang intrinsics impl) - // CHECK: ( 8, 9, 2, 3, 12, 13, 6, 7 ) - %r4 = vector.shuffle %a, %b[8, 9, 2, 3, 12, 13, 6, 7] - : vector<8xf32>, vector<8xf32> - vector.print %r4: vector<8xf32> - - llvm.return - } - - // Solely exists to prevent inlining and get the expected assembly. - llvm.func @entry_point(%a: vector<8xf32>, %b: vector<8xf32>) { - llvm.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> () - llvm.return - } - - llvm.func @entry_point_with_all_constants() { - %a = llvm.mlir.constant(dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> - : vector<8xf32>) : vector<8xf32> - %b = llvm.mlir.constant(dense<[8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]> - : vector<8xf32>) : vector<8xf32> - llvm.call @function_to_run(%a, %b) : (vector<8xf32>, vector<8xf32>) -> () - llvm.return - } -} diff --git a/mlir/test/lib/Dialect/Vector/CMakeLists.txt b/mlir/test/lib/Dialect/Vector/CMakeLists.txt index 7629595..b3c0807 100644 --- a/mlir/test/lib/Dialect/Vector/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Vector/CMakeLists.txt @@ -9,7 +9,6 @@ add_mlir_library(MLIRVectorTestPasses MLIRAnalysis MLIRLinalg MLIRLinalgTransforms - MLIRLLVMIR MLIRMemRef MLIRPass MLIRSCF @@ -17,5 +16,4 @@ add_mlir_library(MLIRVectorTestPasses MLIRTransformUtils MLIRVector MLIRVectorToSCF - MLIRX86Vector ) diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index 0acab9c..ccd6bc5 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -10,7 +10,6 @@ #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" @@ -201,10 +200,6 @@ struct TestVectorTransposeLowering llvm::cl::desc("Lower vector.transpose to avx2-specific patterns"), llvm::cl::init(false)}; - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - void runOnFunction() override { RewritePatternSet patterns(&getContext()); diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index b0ed24a..a4556d9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -483,7 +483,6 @@ cc_library( deps = [ "//mlir:Affine", "//mlir:Analysis", - "//mlir:LLVMDialect", "//mlir:LinalgOps", "//mlir:LinalgTransforms", "//mlir:MemRefDialect", @@ -493,7 +492,6 @@ cc_library( "//mlir:TransformUtils", "//mlir:VectorOps", "//mlir:VectorToSCF", - "//mlir:X86Vector", ], ) -- 2.7.4