Utility for checking the encoding and line ending of source files (#5188)

author Vladimir Glavnyy <31897320+vglavnyy@users.noreply.github.com>

Tue, 19 Feb 2019 19:22:25 +0000 (02:22 +0700)

committer Wouter van Oortmerssen <aardappel@gmail.com>

Tue, 19 Feb 2019 19:22:25 +0000 (20:22 +0100)
author Vladimir Glavnyy <31897320+vglavnyy@users.noreply.github.com>
Tue, 19 Feb 2019 19:22:25 +0000 (02:22 +0700)
committer Wouter van Oortmerssen <aardappel@gmail.com>
Tue, 19 Feb 2019 19:22:25 +0000 (20:22 +0100)
diff --git a/.travis.yml b/.travis.yml

index b6ab995..23c4c23 100644 (file)
--- a/.travis.yml
+++ b/.travis.yml
@@ -89,6 +89,7 @@ matrix:
        - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo ln -s -v -f $(which gcc-$GCC_VERSION) /usr/bin/gcc; fi
  
        script:
+      - bash .travis/check-sources.sh
        - bash grpc/build_grpc.sh
        - cmake . 
          -DCMAKE_BUILD_TYPE=$BUILD_TYPE 
diff --git a/.travis/check-sources.sh b/.travis/check-sources.sh

new file mode 100644 (file)

index 0000000..3e6dbf1
--- /dev/null
+++ b/.travis/check-sources.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Copyright 2018 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+if [ -n "$1" ]; then
+  scan_dir="$1"
+else
+  scan_dir="$( pwd )"
+fi
+
+py_checker="$0.py"
+
+echo "scan root directory = '$scan_dir'"
+python3 --version
+# Scan recursively and search all *.cpp and *.h files using regex patterns.
+# Assume that script running from a root of Flatbuffers working dir.
+python3 $py_checker "ascii" "$scan_dir/include" "\.h$"
+python3 $py_checker "ascii" "$scan_dir/src"     "\.cpp$"
+python3 $py_checker "ascii" "$scan_dir/tests"   "\.h$"
+python3 $py_checker "utf-8" "$scan_dir/tests"   "\.cpp$"
diff --git a/.travis/check-sources.sh.py b/.travis/check-sources.sh.py

new file mode 100644 (file)

index 0000000..5ad060c
--- /dev/null
+++ b/.travis/check-sources.sh.py
@@ -0,0 +1,36 @@
+import os
+import re
+import sys
+
+def check_encoding(encoding, scan_dir, regex_pattern):
+  fname = None
+  try:
+    assert encoding in ['ascii', 'utf-8'], "unexpected encoding"
+    cmp = re.compile(regex_pattern)
+    for root, dirs, files in os.walk(scan_dir):
+      fname = root
+      cmp_list = [f for f in files if cmp.search(f) is not None]
+      for f in cmp_list:
+        fname = os.path.join(root, f)
+        with open(fname, mode='rb') as test_file:
+          btext = test_file.read()
+        # check encoding
+        btext.decode(encoding=encoding, errors="strict")
+        if encoding == "utf-8" and btext.startswith(b'\xEF\xBB\xBF'):
+          raise ValueError("unexpected BOM in file")
+        # check strict CRLF line-ending
+        LF = btext.count(b'\r')
+        CRLF = btext.count(b'\r\n')
+        assert LF >= CRLF, "CRLF logic error"
+        if CRLF != LF:
+          raise ValueError("CRLF violation: found {} LF characters".format(LF - CRLF))
+  except Exception as err:
+    print("ERROR with [{}]: {}".format(fname, err))
+    return -1
+  else:
+    return 0
+
+if __name__ == "__main__":
+  # python check-sources.sh.py 'ascii' '.' '.*\.(cpp|h)$'
+  res = check_encoding(sys.argv[1], sys.argv[2], sys.argv[3])
+  sys.exit(0 if res == 0 else -1)
diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp

index 32a9a72..a9e207e 100644 (file)
--- a/src/idl_parser.cpp
+++ b/src/idl_parser.cpp
@@ -428,7 +428,7 @@ CheckedError Parser::Next() {
  
          auto dot_lvl = (c == '.') ? 0 : 1;  // dot_lvl==0 <=> exactly one '.' seen
          if (!dot_lvl && !is_digit(*cursor_)) return NoError(); // enum?
-        // Parser accepts hexadecimal-ﬂoating-literal (see C++ 5.13.4).
+        // Parser accepts hexadecimal-floating-literal (see C++ 5.13.4).
          if (is_digit(c) || has_sign || !dot_lvl) {
            const auto start = cursor_ - 1;
            auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1;
diff --git a/tests/test.cpp b/tests/test.cpp

index a1a10ba..a18983f 100644 (file)
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -1,4 +1,4 @@
-/*
+/*
   * Copyright 2014 Google Inc. All rights reserved.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
author	Vladimir Glavnyy <31897320+vglavnyy@users.noreply.github.com>
	Tue, 19 Feb 2019 19:22:25 +0000 (02:22 +0700)
committer	Wouter van Oortmerssen <aardappel@gmail.com>
	Tue, 19 Feb 2019 19:22:25 +0000 (20:22 +0100)
.travis.yml		patch \| blob \| history
.travis/check-sources.sh	[new file with mode: 0644]	patch \| blob
.travis/check-sources.sh.py	[new file with mode: 0644]	patch \| blob
src/idl_parser.cpp		patch \| blob \| history
tests/test.cpp		patch \| blob \| history