From 5da7bda826a98fa92eb1356907afa631bfa9c1b1 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Thu, 31 Jul 2014 15:11:03 -0700 Subject: [PATCH] File identifier feature. Allows you to add, and test for the presence of a magic 4-char string in a FlatBuffer. Tested: on OS X. Change-Id: I090692a9e4fb53bed3543279a28563e67132cba0 --- .gitignore | 1 + docs/html/md__cpp_usage.html | 2 +- docs/html/md__schemas.html | 10 +++++++++- docs/source/CppUsage.md | 2 +- docs/source/Schemas.md | 37 +++++++++++++++++++++++++++++++++++++ include/flatbuffers/flatbuffers.h | 21 +++++++++++++++++++-- include/flatbuffers/idl.h | 2 ++ src/flatc.cpp | 3 ++- src/idl_gen_cpp.cpp | 20 +++++++++++++++++++- src/idl_parser.cpp | 29 +++++++++++++++++++++++++++-- tests/monster_test.fbs | 3 +++ tests/monster_test_generated.h | 4 ++++ tests/test.cpp | 4 +++- 13 files changed, 128 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index dd94593..7959fe8 100755 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ flatsamplebinary flatsampletext snapshot.sh tests/go_gen +CMakeLists.txt.user diff --git a/docs/html/md__cpp_usage.html b/docs/html/md__cpp_usage.html index 50a0a13..9a74910 100644 --- a/docs/html/md__cpp_usage.html +++ b/docs/html/md__cpp_usage.html @@ -74,7 +74,7 @@ mb.add_name(name); mb.add_inventory(inventory); auto mloc = mb.Finish();

We start with a temporary helper class MonsterBuilder (which is defined in our generated code also), then call the various add_ methods to set fields, and Finish to complete the object. This is pretty much the same code as you find inside CreateMonster, except we're leaving out a few fields. Fields may also be added in any order, though orderings with fields of the same size adjacent to each other most efficient in size, due to alignment. You should not nest these Builder classes (serialize your data in pre-order).

-

Regardless of whether you used CreateMonster or MonsterBuilder, you now have an offset to the root of your data, and you can finish the buffer using:

fbb.Finish(mloc);
+

Regardless of whether you used CreateMonster or MonsterBuilder, you now have an offset to the root of your data, and you can finish the buffer using:

FinishMonsterBuffer(fbb, mloc);
 

The buffer is now ready to be stored somewhere, sent over the network, be compressed, or whatever you'd like to do with it. You can access the start of the buffer with fbb.GetBufferPointer(), and it's size from fbb.GetSize().

samples/sample_binary.cpp is a complete code sample similar to the code above, that also includes the reading code below.

Reading in C++

diff --git a/docs/html/md__schemas.html b/docs/html/md__schemas.html index 93bf4c3..9ac82eb 100644 --- a/docs/html/md__schemas.html +++ b/docs/html/md__schemas.html @@ -113,7 +113,15 @@ root_type Monster;

These will generate the corresponding namespace in C++ for all helper code, and packages in Java. You can use . to specify nested namespaces / packages.

Root type

This declares what you consider to be the root table (or struct) of the serialized data. This is particular important for parsing JSON data, which doesn't include object type information.

-

Comments & documentation

+

File identification and extension

+

Typically, a FlatBuffer binary buffer is not self-describing, i.e. it needs you to know its schema to parse it correctly. But if you want to use a FlatBuffer as a file format, it would be convenient to be able to have a "magic number" in there, like most file formats have, to be able to do a sanity check to see if you're reading the kind of file you're expecting.

+

Now, you can always prefix a FlatBuffer with your own file header, but FlatBuffers has a built-in way to add an identifier to a FlatBuffer that takes up minimal space, and keeps the buffer compatible with buffers that don't have such an identifier.

+

You can specify in a schema, similar to root_type, that you intend for this type of FlatBuffer to be used as a file format:

file_identifier "MYFI";
+

Identifiers must always be exactly 4 characters long. These 4 characters will end up as bytes at offsets 4-7 (inclusive) in the buffer.

+

For any schema that has such an identifier, flatc will automatically add the identifier to any binaries it generates (with -b), and generated calls like FinishMonsterBuffer also add the identifier. If you have specified an identifier and wish to generate a buffer without one, you can always still do so by calling FlatBufferBuilder::Finish explicitly.

+

After loading a buffer, you can use a call like MonsterBufferHasIdentifier to check if the identifier is present.

+

Additionally, by default flatc will output binary files as .bin. This declaration in the schema will change that to whatever you want:

file_extension "ext";
+

Comments & documentation

May be written as in most C-based languages. Additionally, a triple comment (///) on a line by itself signals that a comment is documentation for whatever is declared on the line after it (table/struct/field/enum/union/element), and the comment is output in the corresponding C++ code. Multiple such lines per item are allowed.

Attributes

Attributes may be attached to a declaration, behind a field, or after the name of a table/struct/enum/union. These may either have a value or not. Some attributes like deprecated are understood by the compiler, others are simply ignored (like priority), but are available to query if you parse the schema at runtime. This is useful if you write your own code generators/editors etc., and you wish to add additional information specific to your tool (such as a help text).

diff --git a/docs/source/CppUsage.md b/docs/source/CppUsage.md index 418b1f9..550c2c5 100755 --- a/docs/source/CppUsage.md +++ b/docs/source/CppUsage.md @@ -88,7 +88,7 @@ Regardless of whether you used `CreateMonster` or `MonsterBuilder`, you now have an offset to the root of your data, and you can finish the buffer using: - fbb.Finish(mloc); + FinishMonsterBuffer(fbb, mloc); The buffer is now ready to be stored somewhere, sent over the network, be compressed, or whatever you'd like to do with it. You can access the diff --git a/docs/source/Schemas.md b/docs/source/Schemas.md index 30f1862..a9fa621 100755 --- a/docs/source/Schemas.md +++ b/docs/source/Schemas.md @@ -147,6 +147,43 @@ This declares what you consider to be the root table (or struct) of the serialized data. This is particular important for parsing JSON data, which doesn't include object type information. +### File identification and extension + +Typically, a FlatBuffer binary buffer is not self-describing, i.e. it +needs you to know its schema to parse it correctly. But if you +want to use a FlatBuffer as a file format, it would be convenient +to be able to have a "magic number" in there, like most file formats +have, to be able to do a sanity check to see if you're reading the +kind of file you're expecting. + +Now, you can always prefix a FlatBuffer with your own file header, +but FlatBuffers has a built-in way to add an identifier to a +FlatBuffer that takes up minimal space, and keeps the buffer +compatible with buffers that don't have such an identifier. + +You can specify in a schema, similar to `root_type`, that you intend +for this type of FlatBuffer to be used as a file format: + + file_identifier "MYFI"; + +Identifiers must always be exactly 4 characters long. These 4 characters +will end up as bytes at offsets 4-7 (inclusive) in the buffer. + +For any schema that has such an identifier, `flatc` will automatically +add the identifier to any binaries it generates (with `-b`), +and generated calls like `FinishMonsterBuffer` also add the identifier. +If you have specified an identifier and wish to generate a buffer +without one, you can always still do so by calling +`FlatBufferBuilder::Finish` explicitly. + +After loading a buffer, you can use a call like +`MonsterBufferHasIdentifier` to check if the identifier is present. + +Additionally, by default `flatc` will output binary files as `.bin`. +This declaration in the schema will change that to whatever you want: + + file_extension "ext"; + ### Comments & documentation May be written as in most C-based languages. Additionally, a triple diff --git a/include/flatbuffers/flatbuffers.h b/include/flatbuffers/flatbuffers.h index 5234c4f..0a0ef17 100644 --- a/include/flatbuffers/flatbuffers.h +++ b/include/flatbuffers/flatbuffers.h @@ -613,10 +613,21 @@ class FlatBufferBuilder { return CreateVectorOfStructs(v.data(), v.size()); } + static const int kFileIdentifierLength = 4; + // Finish serializing a buffer by writing the root offset. - template void Finish(Offset root) { + // If a file_identifier is given, the buffer will be prefix with a standard + // FlatBuffers file header. + template void Finish(Offset root, + const char *file_identifier = nullptr) { // This will cause the whole buffer to be aligned. - PreAlign(sizeof(uoffset_t), minalign_); + PreAlign(sizeof(uoffset_t) + (file_identifier ? kFileIdentifierLength : 0), + minalign_); + if (file_identifier) { + assert(strlen(file_identifier) == kFileIdentifierLength); + buf_.push(reinterpret_cast(file_identifier), + kFileIdentifierLength); + } PushElement(ReferTo(root.o)); // Location of root. } @@ -649,6 +660,12 @@ template const T *GetRoot(const void *buf) { EndianScalar(*reinterpret_cast(buf))); } +// Helper to see if the identifier in a buffer has the expected value. +inline bool BufferHasIdentifier(const void *buf, const char *identifier) { + return strncmp(reinterpret_cast(buf) + 4, identifier, + FlatBufferBuilder::kFileIdentifierLength) == 0; +} + // Helper class to verify the integrity of a FlatBuffer class Verifier { public: diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index ac07d8c..9ac1148 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -284,6 +284,8 @@ class Parser { FlatBufferBuilder builder_; // any data contained in the file StructDef *root_struct_def; + std::string file_identifier_; + std::string file_extension_; private: const char *source_, *cursor_; diff --git a/src/flatc.cpp b/src/flatc.cpp index 8efef1c..97cd699 100755 --- a/src/flatc.cpp +++ b/src/flatc.cpp @@ -27,9 +27,10 @@ bool GenerateBinary(const Parser &parser, const std::string &path, const std::string &file_name, const GeneratorOptions & /*opts*/) { + auto ext = parser.file_extension_.length() ? parser.file_extension_ : "bin"; return !parser.builder_.GetSize() || flatbuffers::SaveFile( - (path + file_name + ".bin").c_str(), + (path + file_name + "." + ext).c_str(), reinterpret_cast(parser.builder_.GetBufferPointer()), parser.builder_.GetSize(), true); diff --git a/src/idl_gen_cpp.cpp b/src/idl_gen_cpp.cpp index 1d6a9ad..6bcbdb5 100644 --- a/src/idl_gen_cpp.cpp +++ b/src/idl_gen_cpp.cpp @@ -466,18 +466,36 @@ std::string GenerateCPP(const Parser &parser, const std::string &include_guard_i code += decl_code; code += enum_code_post; - // Generate convenient root datatype accessor, and root verifier. + // Generate convenient global helper functions: if (parser.root_struct_def) { + // The root datatype accessor: code += "inline const " + parser.root_struct_def->name + " *Get"; code += parser.root_struct_def->name; code += "(const void *buf) { return flatbuffers::GetRoot<"; code += parser.root_struct_def->name + ">(buf); }\n\n"; + // The root verifier: code += "inline bool Verify"; code += parser.root_struct_def->name; code += "Buffer(const flatbuffers::Verifier &verifier) { " "return verifier.VerifyBuffer<"; code += parser.root_struct_def->name + ">(); }\n\n"; + + // Finish a buffer with a given root object: + code += "inline void Finish" + parser.root_struct_def->name; + code += "Buffer(flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset<"; + code += parser.root_struct_def->name + "> root) { fbb.Finish(root"; + if (parser.file_identifier_.length()) + code += ", \"" + parser.file_identifier_ + "\""; + code += "); }\n\n"; + + if (parser.file_identifier_.length()) { + // Check if a buffer has the identifier. + code += "inline bool " + parser.root_struct_def->name; + code += "BufferHasIdentifier(const void *buf) { return flatbuffers::"; + code += "BufferHasIdentifier(buf, \"" + parser.file_identifier_; + code += "\"); }\n\n"; + } } // Close the namespaces. diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index b7cb614..74d9887 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -81,7 +81,9 @@ template<> inline Offset atot>(const char *s) { TD(Enum, 263, "enum") \ TD(Union, 264, "union") \ TD(NameSpace, 265, "namespace") \ - TD(RootType, 266, "root_type") + TD(RootType, 266, "root_type") \ + TD(FileIdentifier, 267, "file_identifier") \ + TD(FileExtension, 268, "file_extension") #ifdef __GNUC__ __extension__ // Stop GCC complaining about trailing comma with -Wpendantic. #endif @@ -194,6 +196,14 @@ void Parser::Next() { if (attribute_ == "union") { token_ = kTokenUnion; return; } if (attribute_ == "namespace") { token_ = kTokenNameSpace; return; } if (attribute_ == "root_type") { token_ = kTokenRootType; return; } + if (attribute_ == "file_identifier") { + token_ = kTokenFileIdentifier; + return; + } + if (attribute_ == "file_extension") { + token_ = kTokenFileExtension; + return; + } // If not, it is a user-defined identifier: token_ = kTokenIdentifier; return; @@ -802,11 +812,26 @@ bool Parser::Parse(const char *source) { Next(); auto root_type = attribute_; Expect(kTokenIdentifier); - Expect(';'); if (!SetRootType(root_type.c_str())) Error("unknown root type: " + root_type); if (root_struct_def->fixed) Error("root type must be a table"); + Expect(';'); + } else if (token_ == kTokenFileIdentifier) { + Next(); + file_identifier_ = attribute_; + Expect(kTokenStringConstant); + if (file_identifier_.length() != + FlatBufferBuilder::kFileIdentifierLength) + Error("file_identifier must be exactly " + + NumToString(FlatBufferBuilder::kFileIdentifierLength) + + " characters"); + Expect(';'); + } else if (token_ == kTokenFileExtension) { + Next(); + file_extension_ = attribute_; + Expect(kTokenStringConstant); + Expect(';'); } else { ParseDecl(); } diff --git a/tests/monster_test.fbs b/tests/monster_test.fbs index caf6946..bdf4aae 100755 --- a/tests/monster_test.fbs +++ b/tests/monster_test.fbs @@ -36,3 +36,6 @@ table Monster { } root_type Monster; + +file_identifier "MONS"; +file_extension "mon"; diff --git a/tests/monster_test_generated.h b/tests/monster_test_generated.h index aed7ea2..42dc418 100755 --- a/tests/monster_test_generated.h +++ b/tests/monster_test_generated.h @@ -187,6 +187,10 @@ inline const Monster *GetMonster(const void *buf) { return flatbuffers::GetRoot< inline bool VerifyMonsterBuffer(const flatbuffers::Verifier &verifier) { return verifier.VerifyBuffer(); } +inline void FinishMonsterBuffer(flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset root) { fbb.Finish(root, "MONS"); } + +inline bool MonsterBufferHasIdentifier(const void *buf) { return flatbuffers::BufferHasIdentifier(buf, "MONS"); } + } // namespace Example } // namespace MyGame diff --git a/tests/test.cpp b/tests/test.cpp index f860897..e10367c 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -93,7 +93,7 @@ std::string CreateFlatBufferTest() { Any_Monster, mloc2.Union(), // Store a union. testv, vecofstrings, vecoftables, 0); - builder.Finish(mloc); + FinishMonsterBuffer(builder, mloc); #ifdef FLATBUFFERS_TEST_VERBOSE // print byte data for debugging: @@ -116,6 +116,8 @@ void AccessFlatBufferTest(const std::string &flatbuf) { flatbuf.length()); TEST_EQ(VerifyMonsterBuffer(verifier), true); + TEST_EQ(MonsterBufferHasIdentifier(flatbuf.c_str()), true); + // Access the buffer from the root. auto monster = GetMonster(flatbuf.c_str()); -- 2.7.4