[idl_parser] Track included files by hash (#6434)
authorMark Spatz <mspatz@leaflabs.com>
Mon, 1 Mar 2021 20:34:01 +0000 (14:34 -0600)
committerGitHub <noreply@github.com>
Mon, 1 Mar 2021 20:34:01 +0000 (12:34 -0800)
* [idl_gen] Delete ts::GenPrefixedImport()

I don't know what this is for, but it's the only piece of code external
to idl_parser.cpp that expects the key of Parser::included_files_ to be
a path. And it appears to be unused.

* [idl_parser] Track included files by hash

Parser::included_files_ is a map whose main purpose is to keep track of
which files have already been parsed in order to protect against
multiple inclusion. Its key is the path that the file was found at
during parsing (or, if it's an in-memory file, just its name).

This commit changes the key to be the 64 bit FNV-1a hash of the file's
name (just the name, not the complete path) xor'd with the hash of the
file's contents (unless it's an in-memory file, then we only hash the
name.)

This allows multiple include protection to function even in the face of
unique per-file include paths (fixes #6425).

* Ran tests/generate_code.sh

CI told me to do it.

* Gracefullt handle case where source_filename == nullptr

I just learned source_filename might also be null. In that case, we
should exclude it from the hash instead of hashing a zero length
string, just like we exclude source when it is null.

Presumably nameless files will never be included (they can't, can they?)
so this doesn't really matter, but I think it's prettier anyways.

include/flatbuffers/idl.h
src/idl_gen_ts.cpp
src/idl_parser.cpp
tests/monster_test_generated.rs

index 955ac3f..0e9a3bc 100644 (file)
@@ -997,7 +997,7 @@ class Parser : public ParserState {
   std::string file_identifier_;
   std::string file_extension_;
 
-  std::map<std::string, std::string> included_files_;
+  std::map<uint64_t, std::string> included_files_;
   std::map<std::string, std::set<std::string>> files_included_per_file_;
   std::vector<std::string> native_included_files_;
 
index 2ebf1f7..ee19167 100644 (file)
@@ -308,28 +308,6 @@ class TsGenerator : public BaseGenerator {
     return "NS" + NumToString(HashFnv1a<uint64_t>(file.c_str()));
   }
 
-  std::string GenPrefixedImport(const std::string &full_file_name,
-                                const std::string &base_name) {
-    // Either keep the include path as it was
-    // or use only the base_name + kGeneratedFileNamePostfix
-    std::string path;
-    if (parser_.opts.keep_include_path) {
-      auto it = parser_.included_files_.find(full_file_name);
-      FLATBUFFERS_ASSERT(it != parser_.included_files_.end());
-      path = flatbuffers::StripExtension(it->second) +
-             parser_.opts.filename_suffix;
-    } else {
-      path = base_name + parser_.opts.filename_suffix;
-    }
-
-    // Add the include prefix and make the path always relative
-    path = flatbuffers::ConCatPathFileName(parser_.opts.include_prefix, path);
-    path = std::string(".") + kPathSeparator + path;
-
-    return "import * as " + GenFileNamespacePrefix(full_file_name) +
-           " from \"" + path + "\";\n";
-  }
-
   // Adds a source-dependent prefix, for of import * statements.
   std::string GenPrefixedTypeName(const std::string &typeName,
                                   const std::string &file) {
index 0f48c84..8cfd04b 100644 (file)
@@ -3192,13 +3192,32 @@ CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
   return NoError();
 }
 
+// Generate a unique hash for a file based on its name and contents (if any).
+static uint64_t HashFile(const char *source_filename, const char *source) {
+  uint64_t hash = 0;
+
+  if (source_filename)
+    hash = HashFnv1a<uint64_t>(StripPath(source_filename).c_str());
+
+  if (source && *source) hash ^= HashFnv1a<uint64_t>(source);
+
+  return hash;
+}
+
 CheckedError Parser::DoParse(const char *source, const char **include_paths,
                              const char *source_filename,
                              const char *include_filename) {
+  uint64_t source_hash = 0;
   if (source_filename) {
-    if (included_files_.find(source_filename) == included_files_.end()) {
-      included_files_[source_filename] =
-          include_filename ? include_filename : "";
+    // If the file is in-memory, don't include its contents in the hash as we
+    // won't be able to load them later.
+    if (FileExists(source_filename))
+      source_hash = HashFile(source_filename, source);
+    else
+      source_hash = HashFile(source_filename, nullptr);
+
+    if (included_files_.find(source_hash) == included_files_.end()) {
+      included_files_[source_hash] = include_filename ? include_filename : "";
       files_included_per_file_[source_filename] = std::set<std::string>();
     } else {
       return NoError();
@@ -3249,12 +3268,14 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
         return Error("unable to locate include file: " + name);
       if (source_filename)
         files_included_per_file_[source_filename].insert(filepath);
-      if (included_files_.find(filepath) == included_files_.end()) {
+
+      std::string contents;
+      bool file_loaded = LoadFile(filepath.c_str(), true, &contents);
+      if (included_files_.find(HashFile(filepath.c_str(), contents.c_str())) ==
+          included_files_.end()) {
         // We found an include file that we have not parsed yet.
-        // Load it and parse it.
-        std::string contents;
-        if (!LoadFile(filepath.c_str(), true, &contents))
-          return Error("unable to load include file: " + name);
+        // Parse it.
+        if (!file_loaded) return Error("unable to load include file: " + name);
         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
                        name.c_str()));
         // We generally do not want to output code for any included files:
@@ -3271,7 +3292,7 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
         // entered into included_files_.
         // This is recursive, but only go as deep as the number of include
         // statements.
-        if (source_filename) { included_files_.erase(source_filename); }
+        included_files_.erase(source_hash);
         return DoParse(source, include_paths, source_filename,
                        include_filename);
       }
index 0102c74..0d82b2f 100644 (file)
@@ -2,8 +2,8 @@
 
 
 
-use crate::include_test1_generated::*;
 use crate::include_test2_generated::*;
+use crate::include_test1_generated::*;
 use std::mem;
 use std::cmp::Ordering;
 
@@ -13,8 +13,8 @@ use self::flatbuffers::EndianScalar;
 #[allow(unused_imports, dead_code)]
 pub mod my_game {
 
-  use crate::include_test1_generated::*;
   use crate::include_test2_generated::*;
+  use crate::include_test1_generated::*;
   use std::mem;
   use std::cmp::Ordering;
 
@@ -127,8 +127,8 @@ impl InParentNamespaceT {
 #[allow(unused_imports, dead_code)]
 pub mod example_2 {
 
-  use crate::include_test1_generated::*;
   use crate::include_test2_generated::*;
+  use crate::include_test1_generated::*;
   use std::mem;
   use std::cmp::Ordering;
 
@@ -243,8 +243,8 @@ impl MonsterT {
 #[allow(unused_imports, dead_code)]
 pub mod example {
 
-  use crate::include_test1_generated::*;
   use crate::include_test2_generated::*;
+  use crate::include_test1_generated::*;
   use std::mem;
   use std::cmp::Ordering;