From bb0bbed610d86ba155f9c066c23038f7f34e2dbb Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 23 Jun 2023 09:17:53 +0200 Subject: [PATCH] Fix bytecode reader/writer on big-endian platforms This makes the bytecode reader/writer work on big-endian platforms. The only problem was related to encoding of multi-byte integers, where both reader and writer code make implicit assumptions about endianness of the host platform. This fixes the current test failures on s390x, and in addition allows to remove the UNSUPPORTED markers from all other bytecode-related test cases - they now also all pass on s390x. Also adding a GFAIL_SKIP to the MultiModuleWithResource unit test, as this still fails due to an unrelated endian bug regarding decoding of external resources. Differential Revision: https://reviews.llvm.org/D153567 Reviewed By: mehdi_amini, jpienaar, rriddle --- mlir/lib/Bytecode/Reader/BytecodeReader.cpp | 18 ++++++++++++------ mlir/lib/Bytecode/Writer/BytecodeWriter.cpp | 7 +++++-- mlir/test/Bytecode/general.mlir | 3 --- .../test/Bytecode/invalid/invalid-dialect_section.mlir | 3 --- mlir/test/Bytecode/invalid/invalid-ir_section.mlir | 3 --- mlir/test/Bytecode/invalid/invalid-string_section.mlir | 3 --- mlir/test/Bytecode/invalid/invalid-structure.mlir | 3 --- .../invalid/invalid_attr_type_offset_section.mlir | 3 --- .../Bytecode/invalid/invalid_attr_type_section.mlir | 3 --- mlir/test/Bytecode/resources.mlir | 3 --- mlir/test/Bytecode/versioning/versioned_attr.mlir | 3 --- mlir/test/Bytecode/versioning/versioned_bytecode.mlir | 3 --- mlir/test/Bytecode/versioning/versioned_op.mlir | 3 --- mlir/test/Dialect/Builtin/Bytecode/attrs.mlir | 3 --- mlir/test/Dialect/Builtin/Bytecode/types.mlir | 3 --- mlir/test/Dialect/Quant/Bytecode/types.mlir | 3 --- mlir/unittests/Bytecode/BytecodeTest.cpp | 7 +++++++ 17 files changed, 24 insertions(+), 50 deletions(-) diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp index 177372c..5aa24ba 100644 --- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp +++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO: Support for big-endian architectures. - #include "mlir/Bytecode/BytecodeReader.h" #include "mlir/AsmParser/AsmParser.h" #include "mlir/Bytecode/BytecodeImplementation.h" @@ -27,6 +25,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/SourceMgr.h" @@ -203,8 +202,14 @@ public: // Handle the overwhelming uncommon case where the value required all 8 // bytes (i.e. a really really big number). In this case, the marker byte is // all zeros: `00000000`. - if (LLVM_UNLIKELY(result == 0)) - return parseBytes(sizeof(result), reinterpret_cast(&result)); + if (LLVM_UNLIKELY(result == 0)) { + llvm::support::ulittle64_t resultLE; + if (failed(parseBytes(sizeof(resultLE), + reinterpret_cast(&resultLE)))) + return failure(); + result = resultLE; + return success(); + } return parseMultiByteVarInt(result); } @@ -305,12 +310,13 @@ private: "unexpected number of trailing zeros in varint encoding"); // Parse in the remaining bytes of the value. - if (failed(parseBytes(numBytes, reinterpret_cast(&result) + 1))) + llvm::support::ulittle64_t resultLE(result); + if (failed(parseBytes(numBytes, reinterpret_cast(&resultLE) + 1))) return failure(); // Shift out the low-order bits that were used to mark how the value was // encoded. - result >>= (numBytes + 1); + result = resultLE >> (numBytes + 1); return success(); } diff --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp index 03c7a53..936117a 100644 --- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp +++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Endian.h" #include #include #include @@ -538,7 +539,8 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) { if (LLVM_LIKELY(it >>= 7) == 0) { uint64_t encodedValue = (value << 1) | 0x1; encodedValue <<= (numBytes - 1); - emitBytes({reinterpret_cast(&encodedValue), numBytes}); + llvm::support::ulittle64_t encodedValueLE(encodedValue); + emitBytes({reinterpret_cast(&encodedValueLE), numBytes}); return; } } @@ -546,7 +548,8 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) { // If the value is too large to encode in a single byte, emit a special all // zero marker byte and splat the value directly. emitByte(0); - emitBytes({reinterpret_cast(&value), sizeof(value)}); + llvm::support::ulittle64_t valueLE(value); + emitBytes({reinterpret_cast(&valueLE), sizeof(valueLE)}); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/general.mlir b/mlir/test/Bytecode/general.mlir index 180fb93..228072a 100644 --- a/mlir/test/Bytecode/general.mlir +++ b/mlir/test/Bytecode/general.mlir @@ -1,8 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect -emit-bytecode %s | mlir-opt -allow-unregistered-dialect | FileCheck %s -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - // CHECK-LABEL: "bytecode.test1" // CHECK-NEXT: "unregistered.op"() {test_attr = #test.dynamic_singleton} : () -> () // CHECK-NEXT: "bytecode.empty"() : () -> () diff --git a/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir b/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir index 31558bd..0508c09 100644 --- a/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir +++ b/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // the dialect section. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Dialect Name //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/invalid/invalid-ir_section.mlir b/mlir/test/Bytecode/invalid/invalid-ir_section.mlir index e8ea9c3..be87e81 100644 --- a/mlir/test/Bytecode/invalid/invalid-ir_section.mlir +++ b/mlir/test/Bytecode/invalid/invalid-ir_section.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // the IR section. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Operations //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/invalid/invalid-string_section.mlir b/mlir/test/Bytecode/invalid/invalid-string_section.mlir index 64ec9fc..a39017c 100644 --- a/mlir/test/Bytecode/invalid/invalid-string_section.mlir +++ b/mlir/test/Bytecode/invalid/invalid-string_section.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // the string section. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Count //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/invalid/invalid-structure.mlir b/mlir/test/Bytecode/invalid/invalid-structure.mlir index 1bd4ac4..962dff3 100644 --- a/mlir/test/Bytecode/invalid/invalid-structure.mlir +++ b/mlir/test/Bytecode/invalid/invalid-structure.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // a bytecode file. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Version //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir b/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir index 376a260..9a8bc08 100644 --- a/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir +++ b/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // the attribute/type offset section. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Offset //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir b/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir index 359cd0b..aba6b3f 100644 --- a/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir +++ b/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir @@ -1,9 +1,6 @@ // This file contains various failure test cases related to the structure of // the attribute/type offset section. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Index //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/resources.mlir b/mlir/test/Bytecode/resources.mlir index 187101a..33ed01d 100644 --- a/mlir/test/Bytecode/resources.mlir +++ b/mlir/test/Bytecode/resources.mlir @@ -1,8 +1,5 @@ // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - // CHECK-LABEL: @TestDialectResources module @TestDialectResources attributes { // CHECK: bytecode.test = dense_resource : tensor<2xui32> diff --git a/mlir/test/Bytecode/versioning/versioned_attr.mlir b/mlir/test/Bytecode/versioning/versioned_attr.mlir index 0fd6c3c..8ee1a1b 100644 --- a/mlir/test/Bytecode/versioning/versioned_attr.mlir +++ b/mlir/test/Bytecode/versioning/versioned_attr.mlir @@ -1,9 +1,6 @@ // This file contains a test case representative of a dialect parsing an // attribute with versioned custom encoding. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Test attribute upgrade //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/versioning/versioned_bytecode.mlir b/mlir/test/Bytecode/versioning/versioned_bytecode.mlir index 6fcc383..30d2a32 100644 --- a/mlir/test/Bytecode/versioning/versioned_bytecode.mlir +++ b/mlir/test/Bytecode/versioning/versioned_bytecode.mlir @@ -1,8 +1,5 @@ // This file contains test cases related to roundtripping. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Test roundtrip //===--------------------------------------------------------------------===// diff --git a/mlir/test/Bytecode/versioning/versioned_op.mlir b/mlir/test/Bytecode/versioning/versioned_op.mlir index 5fa170b..0a6ad42 100644 --- a/mlir/test/Bytecode/versioning/versioned_op.mlir +++ b/mlir/test/Bytecode/versioning/versioned_op.mlir @@ -1,9 +1,6 @@ // This file contains test cases related to the dialect post-parsing upgrade // mechanism. -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===--------------------------------------------------------------------===// // Test generic //===--------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir b/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir index 2702a15..208d331 100644 --- a/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir +++ b/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir @@ -1,8 +1,5 @@ // RUN: mlir-opt -emit-bytecode -allow-unregistered-dialect %s | mlir-opt -allow-unregistered-dialect -mlir-print-local-scope | FileCheck %s -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===----------------------------------------------------------------------===// // ArrayAttr //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Builtin/Bytecode/types.mlir b/mlir/test/Dialect/Builtin/Bytecode/types.mlir index f6d0fa7..bcfbf64 100644 --- a/mlir/test/Dialect/Builtin/Bytecode/types.mlir +++ b/mlir/test/Dialect/Builtin/Bytecode/types.mlir @@ -1,8 +1,5 @@ // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===----------------------------------------------------------------------===// // ComplexType //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Quant/Bytecode/types.mlir b/mlir/test/Dialect/Quant/Bytecode/types.mlir index 669f8ed..359a585 100644 --- a/mlir/test/Dialect/Quant/Bytecode/types.mlir +++ b/mlir/test/Dialect/Quant/Bytecode/types.mlir @@ -1,8 +1,5 @@ // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s -// Bytecode currently does not support big-endian platforms -// UNSUPPORTED: target=s390x-{{.*}} - //===----------------------------------------------------------------------===// // AnyQuantized //===----------------------------------------------------------------------===// diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp index 96cdbae..290233d 100644 --- a/mlir/unittests/Bytecode/BytecodeTest.cpp +++ b/mlir/unittests/Bytecode/BytecodeTest.cpp @@ -15,6 +15,7 @@ #include "mlir/Parser/Parser.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -54,6 +55,12 @@ TEST(Bytecode, MultiModuleWithResource) { parseSourceString(ostream.str(), parseConfig); ASSERT_TRUE(roundTripModule); + // FIXME: Parsing external resources does not work on big-endian + // platforms currently. + if (llvm::support::endian::system_endianness() == + llvm::support::endianness::big) + GTEST_SKIP(); + // Try to see if we have a valid resource in the parsed module. auto checkResourceAttribute = [&](Operation *op) { Attribute attr = roundTripModule->getAttr("bytecode.test"); -- 2.7.4