This reverts commit 5fa893c.
Windows build bot fails due to missing header
https://reviews.llvm.org/D96326
#endif // _WIN32
#include <assert.h>
-#include <cmath>
#include <iostream>
#include "mlir/ExecutionEngine/CRunnerUtils.h"
template <typename T, int M, int... Dims>
std::ostream &operator<<(std::ostream &os, const Vector<T, M, Dims...> &v);
-template <int... Dims>
-struct StaticSizeMult {
+template <int... Dims> struct StaticSizeMult {
static constexpr int value = 1;
};
-template <int N, int... Dims>
-struct StaticSizeMult<N, Dims...> {
+template <int N, int... Dims> struct StaticSizeMult<N, Dims...> {
static constexpr int value = N * StaticSizeMult<Dims...>::value;
};
}
}
-template <typename T, int M, int... Dims>
-struct VectorDataPrinter {
+template <typename T, int M, int... Dims> struct VectorDataPrinter {
static void print(std::ostream &os, const Vector<T, M, Dims...> &val);
};
std::cout << "Unranked Memref ";
printMemRef(DynamicMemRefType<T>(M));
}
-
-/// Verify the results of two computations are equivalent up to a small
-/// numerical error.
-template <typename T>
-struct MemRefDataVerifier {
- /// Maximum number of errors printed by the verifier.
- static constexpr int errorLimit = 10;
-
- /// Verify the relative difference of the values is smaller than epsilon.
- static bool verifyRelErrorSmallerThan(T actual, T expected, T epsilon);
-
- /// Verify the values are equivalent (integers) or are close (floating-point).
- static bool verifyElem(T actual, T expected);
-
- /// Verify the data element-by-element.
- static void verify(std::ostream &os, T *actualBasePtr, T *expectedBasePtr,
- int64_t dim, int64_t offset, const int64_t *sizes,
- const int64_t *strides, int64_t &errors);
-};
-
-template <typename T>
-bool MemRefDataVerifier<T>::verifyRelErrorSmallerThan(T actual, T expected,
- T epsilon) {
- // Return an error if one of the values is infinite or NaN.
- if (!std::isfinite(actual) || !std::isfinite(expected))
- return false;
- // Return true if the relative error is smaller than epsilon.
- T delta = std::abs(actual - expected);
- T maximum = std::max(std::abs(actual), std::abs(expected));
- if (delta > epsilon * maximum)
- return false;
- return true;
-}
-
-template <typename T>
-bool MemRefDataVerifier<T>::verifyElem(T actual, T expected) {
- return actual == expected;
-}
-
-template <>
-inline bool MemRefDataVerifier<double>::verifyElem(double actual,
- double expected) {
- return verifyRelErrorSmallerThan(actual, expected, 1e-12);
-}
-
-template <>
-inline bool MemRefDataVerifier<float>::verifyElem(float actual,
- float expected) {
- return verifyRelErrorSmallerThan(actual, expected, 1e-6);
-}
-
-template <typename T>
-void MemRefDataVerifier<T>::verify(std::ostream &os, T *actualBasePtr,
- T *expectedBasePtr, int64_t dim,
- int64_t offset, const int64_t *sizes,
- const int64_t *strides, int64_t &errors) {
- // Verify the elements at the current offset.
- if (dim == 0) {
- if (!verifyElem(actualBasePtr[offset], expectedBasePtr[offset])) {
- if (errors < errorLimit) {
- os << actualBasePtr[offset] << " != " << expectedBasePtr[offset]
- << " offset = " << offset << "\n";
- } else if (errors == errorLimit) {
- os << "...\n";
- }
- errors++;
- }
- return;
- }
- // Iterate the current dimension and verify recursively.
- for (int64_t i = 0; i < sizes[0]; ++i) {
- verify(os, actualBasePtr, expectedBasePtr, dim - 1, offset + i * strides[0],
- sizes + 1, strides + 1, errors);
- }
-}
-
-/// Verify the equivalence of two dynamic memrefs.
-template <typename T>
-int64_t verifyMemRef(const DynamicMemRefType<T> &actual,
- const DynamicMemRefType<T> &expected) {
- // Check the shapes of the MemRefs match.
- for (int64_t i = 0; i < actual.rank; ++i) {
- if (expected.rank != actual.rank || actual.offset != expected.offset ||
- actual.sizes[i] != expected.sizes[i] ||
- actual.strides[i] != expected.strides[i]) {
- printMemRefMetaData(std::cerr, actual);
- printMemRefMetaData(std::cerr, expected);
- return -1;
- }
- }
- // Count the errors and print the verification result.
- int64_t errors = 0;
- MemRefDataVerifier<T>::verify(std::cerr, actual.basePtr, expected.basePtr,
- actual.rank, actual.offset, actual.sizes,
- actual.strides, errors);
- return errors;
-}
-
-/// Verify the equivalence of two unranked memrefs.
-template <typename T>
-int64_t verifyMemRef(UnrankedMemRefType<T> &actual,
- UnrankedMemRefType<T> &expected) {
- return verifyMemRef(DynamicMemRefType<T>(actual),
- DynamicMemRefType<T>(expected));
-}
-
} // namespace impl
////////////////////////////////////////////////////////////////////////////////
_mlir_ciface_print_memref_vector_4x4xf32(
StridedMemRefType<Vector2D<4, 4, float>, 2> *M);
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefI32(
- UnrankedMemRefType<int32_t> *actual, UnrankedMemRefType<int32_t> *expected);
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF32(
- UnrankedMemRefType<float> *actual, UnrankedMemRefType<float> *expected);
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF64(
- UnrankedMemRefType<double> *actual, UnrankedMemRefType<double> *expected);
-
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefI32(int64_t rank,
- void *actualPtr,
- void *expectedPtr);
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF32(int64_t rank,
- void *actualPtr,
- void *expectedPtr);
-extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF64(int64_t rank,
- void *actualPtr,
- void *expectedPtr);
-
#endif // EXECUTIONENGINE_RUNNERUTILS_H_
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
- // CHECK: {{^0}}
- %C_ref = alloc() : !row_major_C
- linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
- linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
- outs(%C_ref: !row_major_C)
- %act = memref_cast %C : !row_major_C to memref<*xf32>
- %exp = memref_cast %C_ref : !row_major_C to memref<*xf32>
- %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
- vector.print %errors : i64
- dealloc %C_ref : !row_major_C
-
+ %res = load %C[%c0, %c0]: !row_major_C
+ // CHECK: 64
+ vector.print %res: f32
+
dealloc %A : !row_major_A
dealloc %B : !row_major_B
dealloc %C : !row_major_C
}
func private @rtclock() -> f64
-func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul_column_major = subf %t_end_matmul_column_major, %t_start_matmul_column_major: f64
call @print_perf(%iters, %tmatmul_column_major) : (index, f64) -> ()
- // CHECK: {{^0}}
- %cC_ref = alloc() : !column_major_C
- linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
- linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
- outs(%cC_ref: !column_major_C)
- %act = memref_cast %cC : !column_major_C to memref<*xf32>
- %exp = memref_cast %cC_ref : !column_major_C to memref<*xf32>
- %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
- vector.print %errors : i64
- dealloc %cC_ref : !column_major_C
+ %res = load %cC[%c0, %c0]: !column_major_C
+ // CHECK: 64
+ vector.print %res: !elem_type_c
dealloc %cA : !column_major_A
dealloc %cB : !column_major_B
}
func private @rtclock() -> f64
-func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
func @main() {
%f0 = constant 0.0 : !elem_type_c
%f1 = constant 1.0 : !elem_type_a
-
+
%cA = alloc() : !column_major_A
%cB = alloc() : !column_major_B
%cC = alloc() : !column_major_C
-
+
linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a
linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b
linalg.fill(%cC, %f0) : !column_major_C, !elem_type_c
%tmatmul_column_major_as_row_major = subf %t_end_matmul_column_major_as_row_major, %t_start_matmul_column_major_as_row_major: f64
call @print_perf(%iters, %tmatmul_column_major_as_row_major) : (index, f64) -> ()
- // CHECK: {{^0}}
- %cC_ref = alloc() : !column_major_C
- linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
- linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
- outs(%cC_ref: !column_major_C)
- %act1 = memref_cast %cC : !column_major_C to memref<*xf32>
- %exp1 = memref_cast %cC_ref : !column_major_C to memref<*xf32>
- %errors1 = call @verifyMemRefF32(%act1, %exp1) : (memref<*xf32>, memref<*xf32>) -> i64
- vector.print %errors1 : i64
- dealloc %cC_ref : !column_major_C
-
- // CHECK: {{^0}}
- %C_ref = alloc() : !row_major_C
- linalg.fill(%C_ref, %f0) : !row_major_C, !elem_type_c
- linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
- outs(%C_ref: !row_major_C)
- %act2 = memref_cast %C : !row_major_C to memref<*xf32>
- %exp2 = memref_cast %C_ref : !row_major_C to memref<*xf32>
- %errors2 = call @verifyMemRefF32(%act2, %exp2) : (memref<*xf32>, memref<*xf32>) -> i64
- vector.print %errors2 : i64
- dealloc %C_ref : !row_major_C
-
+ %res = load %cC[%c0, %c0]: !column_major_C
+ // CHECK: 64
+ vector.print %res: !elem_type_c
+ %res2 = load %C[%c0, %c0]: !row_major_C
+ // CHECK: 64
+ vector.print %res2: !elem_type_c
+
dealloc %A : !row_major_A
dealloc %B : !row_major_B
dealloc %C : !row_major_C
}
func private @rtclock() -> f64
-func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
-// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
- // CHECK: {{^0}}
- %C_ref = alloc() : !row_major_C
- linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
- linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B)
- outs(%C_ref: !row_major_C)
- %res = memref_cast %C : !row_major_C to memref<*xi32>
- %exp = memref_cast %C_ref : !row_major_C to memref<*xi32>
- %errors = call @verifyMemRefI32(%res, %exp) : (memref<*xi32>, memref<*xi32>) -> i64
- vector.print %errors : i64
- dealloc %C_ref : !row_major_C
+ %res = load %C[%c0, %c0]: !row_major_C
+ // CHECK: 64
+ vector.print %res: !elem_type_c
dealloc %A : !row_major_A
dealloc %B : !row_major_B
}
func private @rtclock() -> f64
-func private @verifyMemRefI32(memref<*xi32>, memref<*xi32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
_mlir_ciface_print_memref_4d_f32(StridedMemRefType<float, 4> *M) {
impl::printMemRef(*M);
}
-
-extern "C" int64_t
-_mlir_ciface_verifyMemRefI32(UnrankedMemRefType<int32_t> *actual,
- UnrankedMemRefType<int32_t> *expected) {
- return impl::verifyMemRef(*actual, *expected);
-}
-
-extern "C" int64_t
-_mlir_ciface_verifyMemRefF32(UnrankedMemRefType<float> *actual,
- UnrankedMemRefType<float> *expected) {
- return impl::verifyMemRef(*actual, *expected);
-}
-
-extern "C" int64_t
-_mlir_ciface_verifyMemRefF64(UnrankedMemRefType<double> *actual,
- UnrankedMemRefType<double> *expected) {
- return impl::verifyMemRef(*actual, *expected);
-}
-
-extern "C" int64_t verifyMemRefI32(int64_t rank, void *actualPtr,
- void *expectedPtr) {
- UnrankedMemRefType<int32_t> actualDesc = {rank, actualPtr};
- UnrankedMemRefType<int32_t> expectedDesc = {rank, expectedPtr};
- return _mlir_ciface_verifyMemRefI32(&actualDesc, &expectedDesc);
-}
-
-extern "C" int64_t verifyMemRefF32(int64_t rank, void *actualPtr,
- void *expectedPtr) {
- UnrankedMemRefType<float> actualDesc = {rank, actualPtr};
- UnrankedMemRefType<float> expectedDesc = {rank, expectedPtr};
- return _mlir_ciface_verifyMemRefF32(&actualDesc, &expectedDesc);
-}
-
-extern "C" int64_t verifyMemRefF64(int64_t rank, void *actualPtr,
- void *expectedPtr) {
- UnrankedMemRefType<double> actualDesc = {rank, actualPtr};
- UnrankedMemRefType<double> expectedDesc = {rank, expectedPtr};
- return _mlir_ciface_verifyMemRefF64(&actualDesc, &expectedDesc);
-}