The patch extends the runner utils by verification methods that compare two memrefs. The methods compare the content of the two memrefs and print success if the data is identical up to a small numerical error. The methods are meant to simplify the development of integration tests that compare the results against a reference implementation (cf. the updates to the linalg matmul integration tests).
Originally landed in 5fa893c (https://reviews.llvm.org/D96326) and reverted in dd719fd due to a Windows build failure.
Changes:
- Remove the max function that requires the "algorithm" header on Windows
- Eliminate the truncation warning in the float specialization of verifyElem by using a float constant
Reviewed By: Kayjukh
Differential Revision: https://reviews.llvm.org/D96593
#endif // _WIN32
#include <assert.h>
+#include <cmath>
#include <iostream>
#include "mlir/ExecutionEngine/CRunnerUtils.h"
template <typename T, int M, int... Dims>
std::ostream &operator<<(std::ostream &os, const Vector<T, M, Dims...> &v);
-template <int... Dims> struct StaticSizeMult {
+template <int... Dims>
+struct StaticSizeMult {
static constexpr int value = 1;
};
-template <int N, int... Dims> struct StaticSizeMult<N, Dims...> {
+template <int N, int... Dims>
+struct StaticSizeMult<N, Dims...> {
static constexpr int value = N * StaticSizeMult<Dims...>::value;
};
}
}
-template <typename T, int M, int... Dims> struct VectorDataPrinter {
+template <typename T, int M, int... Dims>
+struct VectorDataPrinter {
static void print(std::ostream &os, const Vector<T, M, Dims...> &val);
};
std::cout << "Unranked Memref ";
printMemRef(DynamicMemRefType<T>(M));
}
+
+/// Verify the result of two computations are equivalent up to a small
+/// numerical error and return the number of errors.
+template <typename T>
+struct MemRefDataVerifier {
+ /// Maximum number of errors printed by the verifier.
+ static constexpr int printLimit = 10;
+
+ /// Verify the relative difference of the values is smaller than epsilon.
+ static bool verifyRelErrorSmallerThan(T actual, T expected, T epsilon);
+
+ /// Verify the values are equivalent (integers) or are close (floating-point).
+ static bool verifyElem(T actual, T expected);
+
+ /// Verify the data element-by-element and return the number of errors.
+ static int64_t verify(std::ostream &os, T *actualBasePtr, T *expectedBasePtr,
+ int64_t dim, int64_t offset, const int64_t *sizes,
+ const int64_t *strides, int64_t &printCounter);
+};
+
+template <typename T>
+bool MemRefDataVerifier<T>::verifyRelErrorSmallerThan(T actual, T expected,
+ T epsilon) {
+ // Return an error if one of the values is infinite or NaN.
+ if (!std::isfinite(actual) || !std::isfinite(expected))
+ return false;
+ // Return true if the relative error is smaller than epsilon.
+ T delta = std::abs(actual - expected);
+ return (delta <= epsilon * std::abs(expected));
+}
+
+template <typename T>
+bool MemRefDataVerifier<T>::verifyElem(T actual, T expected) {
+ return actual == expected;
+}
+
+template <>
+inline bool MemRefDataVerifier<double>::verifyElem(double actual,
+ double expected) {
+ return verifyRelErrorSmallerThan(actual, expected, 1e-12);
+}
+
+template <>
+inline bool MemRefDataVerifier<float>::verifyElem(float actual,
+ float expected) {
+ return verifyRelErrorSmallerThan(actual, expected, 1e-6f);
+}
+
+template <typename T>
+int64_t MemRefDataVerifier<T>::verify(std::ostream &os, T *actualBasePtr,
+ T *expectedBasePtr, int64_t dim,
+ int64_t offset, const int64_t *sizes,
+ const int64_t *strides,
+ int64_t &printCounter) {
+ int64_t errors = 0;
+ // Verify the elements at the current offset.
+ if (dim == 0) {
+ if (!verifyElem(actualBasePtr[offset], expectedBasePtr[offset])) {
+ if (printCounter < printLimit) {
+ os << actualBasePtr[offset] << " != " << expectedBasePtr[offset]
+ << " offset = " << offset << "\n";
+ printCounter++;
+ }
+ errors++;
+ }
+ } else {
+ // Iterate the current dimension and verify recursively.
+ for (int64_t i = 0; i < sizes[0]; ++i) {
+ errors +=
+ verify(os, actualBasePtr, expectedBasePtr, dim - 1,
+ offset + i * strides[0], sizes + 1, strides + 1, printCounter);
+ }
+ }
+ return errors;
+}
+
+/// Verify the equivalence of two dynamic memrefs and return the number of
+/// errors or -1 if the shape of the memrefs do not match.
+template <typename T>
+int64_t verifyMemRef(const DynamicMemRefType<T> &actual,
+ const DynamicMemRefType<T> &expected) {
+ // Check if the memref shapes match.
+ for (int64_t i = 0; i < actual.rank; ++i) {
+ if (expected.rank != actual.rank || actual.offset != expected.offset ||
+ actual.sizes[i] != expected.sizes[i] ||
+ actual.strides[i] != expected.strides[i]) {
+ printMemRefMetaData(std::cerr, actual);
+ printMemRefMetaData(std::cerr, expected);
+ return -1;
+ }
+ }
+ // Return the number of errors.
+ int64_t printCounter = 0;
+ return MemRefDataVerifier<T>::verify(
+ std::cerr, actual.basePtr, expected.basePtr, actual.rank, actual.offset,
+ actual.sizes, actual.strides, printCounter);
+}
+
+/// Verify the equivalence of two unranked memrefs and return the number of
+/// errors or -1 if the shape of the memrefs do not match.
+template <typename T>
+int64_t verifyMemRef(UnrankedMemRefType<T> &actual,
+ UnrankedMemRefType<T> &expected) {
+ return verifyMemRef(DynamicMemRefType<T>(actual),
+ DynamicMemRefType<T>(expected));
+}
+
} // namespace impl
////////////////////////////////////////////////////////////////////////////////
_mlir_ciface_print_memref_vector_4x4xf32(
StridedMemRefType<Vector2D<4, 4, float>, 2> *M);
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefI32(
+ UnrankedMemRefType<int32_t> *actual, UnrankedMemRefType<int32_t> *expected);
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF32(
+ UnrankedMemRefType<float> *actual, UnrankedMemRefType<float> *expected);
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF64(
+ UnrankedMemRefType<double> *actual, UnrankedMemRefType<double> *expected);
+
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefI32(int64_t rank,
+ void *actualPtr,
+ void *expectedPtr);
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF32(int64_t rank,
+ void *actualPtr,
+ void *expectedPtr);
+extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF64(int64_t rank,
+ void *actualPtr,
+ void *expectedPtr);
+
#endif // EXECUTIONENGINE_RUNNERUTILS_H_
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
- %res = load %C[%c0, %c0]: !row_major_C
- // CHECK: 64
- vector.print %res: f32
+ // CHECK: {{^0$}}
+ %C_ref = alloc() : !row_major_C
+ linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
+ linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
+ outs(%C_ref: !row_major_C)
+ %act = memref_cast %C : !row_major_C to memref<*xf32>
+ %exp = memref_cast %C_ref : !row_major_C to memref<*xf32>
+ %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
+ vector.print %errors : i64
+ dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
}
func private @rtclock() -> f64
+func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul_column_major = subf %t_end_matmul_column_major, %t_start_matmul_column_major: f64
call @print_perf(%iters, %tmatmul_column_major) : (index, f64) -> ()
- %res = load %cC[%c0, %c0]: !column_major_C
- // CHECK: 64
- vector.print %res: !elem_type_c
+ // CHECK: {{^0$}}
+ %cC_ref = alloc() : !column_major_C
+ linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
+ linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
+ outs(%cC_ref: !column_major_C)
+ %act = memref_cast %cC : !column_major_C to memref<*xf32>
+ %exp = memref_cast %cC_ref : !column_major_C to memref<*xf32>
+ %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
+ vector.print %errors : i64
+ dealloc %cC_ref : !column_major_C
dealloc %cA : !column_major_A
dealloc %cB : !column_major_B
}
func private @rtclock() -> f64
+func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
-// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
+// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul_column_major_as_row_major = subf %t_end_matmul_column_major_as_row_major, %t_start_matmul_column_major_as_row_major: f64
call @print_perf(%iters, %tmatmul_column_major_as_row_major) : (index, f64) -> ()
- %res = load %cC[%c0, %c0]: !column_major_C
- // CHECK: 64
- vector.print %res: !elem_type_c
- %res2 = load %C[%c0, %c0]: !row_major_C
- // CHECK: 64
- vector.print %res2: !elem_type_c
+ // CHECK: {{^0$}}
+ %cC_ref = alloc() : !column_major_C
+ linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
+ linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
+ outs(%cC_ref: !column_major_C)
+ %act1 = memref_cast %cC : !column_major_C to memref<*xf32>
+ %exp1 = memref_cast %cC_ref : !column_major_C to memref<*xf32>
+ %errors1 = call @verifyMemRefF32(%act1, %exp1) : (memref<*xf32>, memref<*xf32>) -> i64
+ vector.print %errors1 : i64
+ dealloc %cC_ref : !column_major_C
+
+ // CHECK: {{^0$}}
+ %C_ref = alloc() : !row_major_C
+ linalg.fill(%C_ref, %f0) : !row_major_C, !elem_type_c
+ linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
+ outs(%C_ref: !row_major_C)
+ %act2 = memref_cast %C : !row_major_C to memref<*xf32>
+ %exp2 = memref_cast %C_ref : !row_major_C to memref<*xf32>
+ %errors2 = call @verifyMemRefF32(%act2, %exp2) : (memref<*xf32>, memref<*xf32>) -> i64
+ vector.print %errors2 : i64
+ dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
}
func private @rtclock() -> f64
+func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
- %res = load %C[%c0, %c0]: !row_major_C
- // CHECK: 64
- vector.print %res: !elem_type_c
+ // CHECK: {{^0$}}
+ %C_ref = alloc() : !row_major_C
+ linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
+ linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B)
+ outs(%C_ref: !row_major_C)
+ %res = memref_cast %C : !row_major_C to memref<*xi32>
+ %exp = memref_cast %C_ref : !row_major_C to memref<*xi32>
+ %errors = call @verifyMemRefI32(%res, %exp) : (memref<*xi32>, memref<*xi32>) -> i64
+ vector.print %errors : i64
+ dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
}
func private @rtclock() -> f64
+func private @verifyMemRefI32(memref<*xi32>, memref<*xi32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)
_mlir_ciface_print_memref_4d_f32(StridedMemRefType<float, 4> *M) {
impl::printMemRef(*M);
}
+
+extern "C" int64_t
+_mlir_ciface_verifyMemRefI32(UnrankedMemRefType<int32_t> *actual,
+ UnrankedMemRefType<int32_t> *expected) {
+ return impl::verifyMemRef(*actual, *expected);
+}
+
+extern "C" int64_t
+_mlir_ciface_verifyMemRefF32(UnrankedMemRefType<float> *actual,
+ UnrankedMemRefType<float> *expected) {
+ return impl::verifyMemRef(*actual, *expected);
+}
+
+extern "C" int64_t
+_mlir_ciface_verifyMemRefF64(UnrankedMemRefType<double> *actual,
+ UnrankedMemRefType<double> *expected) {
+ return impl::verifyMemRef(*actual, *expected);
+}
+
+extern "C" int64_t verifyMemRefI32(int64_t rank, void *actualPtr,
+ void *expectedPtr) {
+ UnrankedMemRefType<int32_t> actualDesc = {rank, actualPtr};
+ UnrankedMemRefType<int32_t> expectedDesc = {rank, expectedPtr};
+ return _mlir_ciface_verifyMemRefI32(&actualDesc, &expectedDesc);
+}
+
+extern "C" int64_t verifyMemRefF32(int64_t rank, void *actualPtr,
+ void *expectedPtr) {
+ UnrankedMemRefType<float> actualDesc = {rank, actualPtr};
+ UnrankedMemRefType<float> expectedDesc = {rank, expectedPtr};
+ return _mlir_ciface_verifyMemRefF32(&actualDesc, &expectedDesc);
+}
+
+extern "C" int64_t verifyMemRefF64(int64_t rank, void *actualPtr,
+ void *expectedPtr) {
+ UnrankedMemRefType<double> actualDesc = {rank, actualPtr};
+ UnrankedMemRefType<double> expectedDesc = {rank, expectedPtr};
+ return _mlir_ciface_verifyMemRefF64(&actualDesc, &expectedDesc);
+}