Add docker/lint.sh, for running dockerized lint scripts locally (#6333)
authorAndrew Reusch <areusch@octoml.ai>
Fri, 28 Aug 2020 16:27:09 +0000 (09:27 -0700)
committerGitHub <noreply@github.com>
Fri, 28 Aug 2020 16:27:09 +0000 (09:27 -0700)
* Add -i option to docker/bash.sh

 * Allows scripts to invoke dockerized commands interactively, for
   better Ctrl+C.

* Add docker/lint.sh to run lint step locally in the docker VM.

 * This allows developers to run lint using the official versions of
   the lint tools without needing to lookup the docker image name.
 * Move all lint scripts to tests/lint/
 * Point Makefile to those new scripts.
 * Update apache rat script to filter untracked/gitignore'd files when
   run with `docker/lint.sh`.

* fix bash_source[0]

* explicitly set the author for CI

* try environment variable override

* try config option

* remove =traditional from ignored option to increase git compat

* address comments, fix behavior under git worktrees

* address cppdocs comments

* address lint.sh comments

* address zhi comments, update pull_request rst

15 files changed:
Jenkinsfile
Makefile
docker/bash.sh
docker/dev_common.sh [new file with mode: 0644]
docker/lint.sh [new file with mode: 0755]
docs/contribute/pull_request.rst
tests/lint/check_asf_header.sh [new file with mode: 0755]
tests/lint/clang_format.sh [new file with mode: 0755]
tests/lint/cppdocs.sh [new file with mode: 0755]
tests/lint/cpplint.sh [new file with mode: 0755]
tests/lint/filter_untracked.py [new file with mode: 0644]
tests/lint/jnilint.sh [new file with mode: 0755]
tests/lint/pylint.sh [new file with mode: 0755]
tests/python/unittest/test_filter_untracked.py [new file with mode: 0644]
tests/scripts/task_lint.sh

index 77b4e4b..49e73ff 100644 (file)
 //
 //
 
+// NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. -->
 ci_lint = "tvmai/ci-lint:v0.61"
 ci_gpu = "tvmai/ci-gpu:v0.64"
 ci_cpu = "tvmai/ci-cpu:v0.65"
 ci_wasm = "tvmai/ci-wasm:v0.60"
 ci_i386 = "tvmai/ci-i386:v0.52"
+// <--- End of regex-scanned config.
 
 // tvm libraries
 tvm_runtime = "build/libtvm_runtime.so, build/config.cmake"
index 825e589..9823c5c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -75,15 +75,13 @@ $(OUTPUTDIR)/libtvm_web_runtime.js: $(OUTPUTDIR)/libtvm_web_runtime.bc
        emcc $(EMCC_FLAGS) -o $@ $(OUTPUTDIR)/libtvm_web_runtime.bc
 
 # Lint scripts
+# NOTE: lint scripts that are executed in the CI should be in tests/lint. This allows docker/lint.sh
+# to behave similarly to the CI.
 cpplint:
-       python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
-       python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp \
-        include src \
-        examples/extension/src examples/graph_executor/src
+       tests/lint/cpplint.sh
 
 pylint:
-       python3 -m pylint python/tvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc
-       python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc
+       tests/lint/pylint.sh
 
 jnilint:
        python3 3rdparty/dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
index 73bfb12..0d67198 100755 (executable)
 # Usage: docker/bash.sh <CONTAINER_NAME>
 #     Starts an interactive session
 #
-# Usage2: docker/bash.sh <CONTAINER_NAME> [COMMAND]
-#     Execute command in the docker image, non-interactive
+# Usage2: docker/bash.sh [-i] <CONTAINER_NAME> [COMMAND]
+#     Execute command in the docker image, default non-interactive
+#     With -i, execute interactively.
 #
+interactive=0
+if [ "$1" == "-i" ]; then
+    interactive=1
+    shift
+fi
+
 if [ "$#" -lt 1 ]; then
-    echo "Usage: docker/bash.sh <CONTAINER_NAME> [COMMAND]"
+    echo "Usage: docker/bash.sh [-i] <CONTAINER_NAME> [COMMAND]"
     exit -1
 fi
 
 DOCKER_IMAGE_NAME=("$1")
 
+CI_DOCKER_EXTRA_PARAMS=( )
 if [ "$#" -eq 1 ]; then
     COMMAND="bash"
+    interactive=1
     if [[ $(uname) == "Darwin" ]]; then
         # Docker's host networking driver isn't supported on macOS.
         # Use default bridge network and expose port for jupyter notebook.
-        CI_DOCKER_EXTRA_PARAMS=("-it -p 8888:8888")
+        CI_DOCKER_EXTRA_PARAMS=( "${CI_DOCKER_EXTRA_PARAMS[@]}" "-p 8888:8888" )
     else
-        CI_DOCKER_EXTRA_PARAMS=("-it --net=host")
+        CI_DOCKER_EXTRA_PARAMS=( "${CI_DOCKER_EXTRA_PARAMS[@]}" "--net=host" )
     fi
 else
     shift 1
     COMMAND=("$@")
 fi
 
+if [ $interactive -eq 1 ]; then
+    CI_DOCKER_EXTRA_PARAMS=( "${CI_DOCKER_EXTRA_PARAMS[@]}" -it )
+fi
+
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 WORKSPACE="$(pwd)"
 
@@ -107,7 +120,7 @@ ${DOCKER_BINARY} run --rm --pid=host\
     -e "CI_PYTEST_ADD_OPTIONS=$CI_PYTEST_ADD_OPTIONS" \
     ${CI_PY_ENV} \
     ${CUDA_ENV} \
-    ${CI_DOCKER_EXTRA_PARAMS[@]} \
+    "${CI_DOCKER_EXTRA_PARAMS[@]}" \
     ${DOCKER_IMAGE_NAME} \
     bash --login /docker/with_the_same_user \
     ${COMMAND[@]}
diff --git a/docker/dev_common.sh b/docker/dev_common.sh
new file mode 100644 (file)
index 0000000..559a664
--- /dev/null
@@ -0,0 +1,44 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+if [ -z "${BASH_SOURCE[0]}" ]; then
+    echo "NOTE: This script must be source'd from another bash script--it cannot be run directly"
+    exit 2
+fi
+
+INVOCATION_PWD="$(pwd)"
+
+
+GIT_TOPLEVEL=$(cd $(dirname ${BASH_SOURCE[0]}) && git rev-parse --show-toplevel)
+
+
+function run_docker() {
+    image_name="$1"  # Name of the Jenkinsfile var to find
+    shift
+
+    image_spec=$(cat "${GIT_TOPLEVEL}/Jenkinsfile" | \
+                     grep -E "^${image_name} = " | \
+                     sed -E "s/${image_name} = \"([^\"]*)\"/\1/")
+    if [ -z "${image_spec}" ]; then
+        echo "${image_name}: not found in ${GIT_TOPLEVEL}/Jenkinsfile" >&2
+        exit 2
+    fi
+
+    "${GIT_TOPLEVEL}/docker/bash.sh" -i "${image_spec}" "$@"
+}
diff --git a/docker/lint.sh b/docker/lint.sh
new file mode 100755 (executable)
index 0000000..82bfdf3
--- /dev/null
@@ -0,0 +1,78 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+source "$(dirname $0)/dev_common.sh"
+
+DEFAULT_STEPS=( file_type asf cpplint clang_format pylint jnilint cppdocs )
+
+function run_lint_step() {
+    validate_only=0
+    if [ "$1" == "--validate-only" ]; then
+        validate_only=1
+        shift
+    fi
+
+    case "$1" in
+        file_type)
+            cmd=( python3 tests/lint/check_file_type.py )
+            ;;
+        asf)
+            cmd=( tests/lint/check_asf_header.sh --local )
+            ;;
+        clang_format)
+            cmd=( tests/lint/clang_format.sh )
+            ;;
+        cpplint)
+            cmd=( tests/lint/cpplint.sh )
+            ;;
+        pylint)
+            cmd=( tests/lint/pylint.sh )
+            ;;
+        jnilint)
+            cmd=( tests/lint/jnilint.sh )
+            ;;
+        cppdocs)
+            cmd=( tests/lint/cppdocs.sh )
+            ;;
+        *)
+            echo "error: don't know how to run lint step: $1" >&2
+            echo "available lint steps: ${DEFAULT_STEPS[@]}"
+            exit 2
+            ;;
+    esac
+
+    if [ $validate_only -eq 0 ]; then
+        run_docker "ci_lint" "${cmd[@]}"
+    fi
+}
+
+if [ $# -eq 0 ]; then
+    # NOTE: matches order in tests/scripts/task_lint.sh
+    steps=( "${DEFAULT_STEPS[@]}" )
+else
+    steps=( "$@" )
+fi
+
+for step in "${steps[@]}"; do
+    run_lint_step --validate-only "$step"
+done
+
+for step in "${steps[@]}"; do
+    run_lint_step "$step"
+done
index 128ae80..935f2d5 100644 (file)
@@ -32,8 +32,22 @@ This is a quick guide to submit a pull request, please also refer to the detaile
 
   .. code:: bash
 
-    # Reproduce the lint procedure in the CI.
+    # Run all lint steps.
+    docker/lint.sh
+
+    # To run steps individually, specify their step names on the command-line. An incorrectly
+    # spelled step name causes the tool to print all available steps.
+    docker/lint.sh <step_name> ...
+
+    # While the lint commands used should be identical to those run in CI, this command reproduces
+    # the CI lint procedure exactly (typically helpful for debugging lint script errors).
     docker/bash.sh tvmai/ci-lint ./tests/scripts/task_lint.sh
+
+  When the clang-format lint check fails, run git-clang-format as follows to automatically reformat
+  your code:
+
+  .. code:: bash
+
     # Run clang-format check for all the files that changed since upstream/master
     docker/bash.sh tvmai/ci-lint ./tests/lint/git-clang-format.sh upstream/master
 
diff --git a/tests/lint/check_asf_header.sh b/tests/lint/check_asf_header.sh
new file mode 100755 (executable)
index 0000000..cd5fe64
--- /dev/null
@@ -0,0 +1,56 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+rat_tempdir="$(mktemp -d)"
+
+function cleanup() {
+    rm -rf "${rat_tempdir}"
+}
+trap cleanup EXIT
+
+rat_output="${rat_tempdir}/$$.apache-rat.txt"
+
+filter_untracked=0
+if [ "$1" == "--local" ]; then
+    filter_untracked=1
+fi
+
+java -jar /bin/apache-rat.jar -E tests/lint/rat-excludes  -d . | (grep -E "^== File" >"${rat_output}" || true)
+
+# Rat can't be configured to ignore untracked files, so filter them.
+if [ ${filter_untracked} -eq 1 ]; then
+    echo "NOTE: --local flag present, filtering untracked files"
+    processed_rat_output="${rat_output}-processed"
+    cat ${rat_output} | sed 's/^== File: //g' | \
+        python3 $(dirname "$0")/filter_untracked.py | \
+        sed 's/^/== File: /g' >"${processed_rat_output}"
+    rat_output="${processed_rat_output}"
+fi
+
+if grep --quiet -E "File" "${rat_output}"; then
+    echo "Need to add ASF header to the following files."
+    echo "----------------File List----------------"
+    cat "${rat_output}"
+    echo "-----------------------------------------"
+    echo "Use the following steps to add the headers:"
+    echo "- Create file_list.txt in your text editor"
+    echo "- Copy paste the above content in file-list into file_list.txt"
+    echo "- python3 tests/lint/add_asf_header.py file_list.txt"
+    exit 1
+fi
diff --git a/tests/lint/clang_format.sh b/tests/lint/clang_format.sh
new file mode 100755 (executable)
index 0000000..de6711b
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# check lastest change, for squash merge into master
+./tests/lint/git-clang-format.sh HEAD~1
+# chekc against origin/master for PRs.
+./tests/lint/git-clang-format.sh origin/master
diff --git a/tests/lint/cppdocs.sh b/tests/lint/cppdocs.sh
new file mode 100755 (executable)
index 0000000..e453b7b
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+function cleanup() {
+    rm -f /tmp/$$.log.txt /tmp/$$.logclean.txt
+}
+trap cleanup EXIT
+
+make doc 2>/tmp/$$.log.txt
+
+grep -v -E "ENABLE_PREPROCESSING|unsupported tag" < /tmp/$$.log.txt > /tmp/$$.logclean.txt || true
+echo "---------Error Log----------"
+cat /tmp/$$.logclean.txt
+echo "----------------------------"
+if grep --quiet -E "warning|error" < /tmp/$$.logclean.txt; then
+    exit 1
+fi
diff --git a/tests/lint/cpplint.sh b/tests/lint/cpplint.sh
new file mode 100755 (executable)
index 0000000..8836ee4
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
+python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp \
+        include src \
+        examples/extension/src examples/graph_executor/src
diff --git a/tests/lint/filter_untracked.py b/tests/lint/filter_untracked.py
new file mode 100644 (file)
index 0000000..df21c2d
--- /dev/null
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import os.path
+import subprocess
+import sys
+
+
+def check_output(args, **kw):
+    proc = subprocess.Popen(args, **kw, stdout=subprocess.PIPE)
+    out, _ = proc.communicate()
+    if proc.returncode:
+      sys.stderr.write('exited with code %d: %s\n' % (proc.returncode, ' '.join(args)))
+      sys.exit(2)
+
+    if sys.version_info[0] == 2:
+      return unicode(out, 'utf-8')
+    else:
+      return str(out, 'utf-8')
+
+
+def main():
+    script_dir = os.path.dirname(__file__) or os.getcwd()
+    toplevel_dir = check_output(['git', 'rev-parse', '--show-toplevel'], cwd=script_dir).strip('\n')
+    # NOTE: --ignore-submodules because this can drag in some problems related to mounting a git
+    # worktree in the docker VM in a different location than it exists on the host. The problem
+    # isn't quite clear, but anyhow it shouldn't be necessary to filter untracked files in
+    # submodules here.
+    git_status_output = check_output(['git', 'status', '-s', '--ignored'],
+                                     cwd=toplevel_dir)
+    untracked = [line[3:]
+                 for line in git_status_output.split('\n')
+                 if line.startswith('?? ') or line.startswith('!! ')]
+
+    # also add .git in case rat picks up files in .git or the .git file (if a worktree).
+    toplevel_git_dentry = os.path.join(toplevel_dir, '.git')
+    if os.path.isfile(toplevel_git_dentry):
+        untracked.append('.git')
+    else:
+        untracked.append('.git/')
+
+    for line in sys.stdin:
+        cleaned_line = line
+        if line[:2] == './':
+            cleaned_line = line[2:]
+        cleaned_line = cleaned_line.strip('\n')
+        if any((cleaned_line.startswith(u) if u[-1] == '/' else cleaned_line == u)
+               for u in untracked):
+            continue
+
+        sys.stdout.write(line)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/tests/lint/jnilint.sh b/tests/lint/jnilint.sh
new file mode 100755 (executable)
index 0000000..39b8048
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+python3 3rdparty/dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
diff --git a/tests/lint/pylint.sh b/tests/lint/pylint.sh
new file mode 100755 (executable)
index 0000000..a96c267
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+python3 -m pylint python/tvm --rcfile=$(dirname "$0")/pylintrc
+python3 -m pylint vta/python/vta --rcfile=$(dirname "$0")/pylintrc
diff --git a/tests/python/unittest/test_filter_untracked.py b/tests/python/unittest/test_filter_untracked.py
new file mode 100644 (file)
index 0000000..73b0eef
--- /dev/null
@@ -0,0 +1,177 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+
+def setup_git_repo(worktree=False):
+  git_repo_dir = tempfile.mkdtemp()
+  to_rm = [git_repo_dir]
+  try:
+      subprocess.check_output(['git', 'init', '.'], cwd=git_repo_dir)
+
+      with open(f'{git_repo_dir}/committed', 'w') as committed_f:
+          committed_f.write('normal committed file\n')
+
+      subprocess.check_output(['git', 'add', 'committed'], cwd=git_repo_dir)
+
+      with open(f'{git_repo_dir}/committed-ignored', 'w') as gitignore_f:
+          gitignore_f.write('this file is gitignored, but committed already')
+
+      subprocess.check_output(['git', 'add', 'committed-ignored'], cwd=git_repo_dir)
+
+      with open(f'{git_repo_dir}/.gitignore', 'w') as gitignore_f:
+          gitignore_f.write('ignored\n'
+                            'committed-ignored\n')
+
+      subprocess.check_output(['git', 'add', '.gitignore'], cwd=git_repo_dir)
+
+      # NOTE: explicitly set the author so this test passes in the CI.
+      subprocess.check_output(['git',
+                               '-c', 'user.name=Unit Test',
+                               '-c', 'user.email=unit.test@testing.tvm.ai',
+                               'commit', '-m', 'initial commit'],
+                              cwd=git_repo_dir)
+
+      if worktree:
+        worktree_dir = tempfile.mkdtemp()
+        to_rm.append(worktree_dir)
+        subprocess.check_output(['git', 'worktree', 'add', worktree_dir], cwd=git_repo_dir)
+        git_repo_dir = worktree_dir
+
+      with open(f'{git_repo_dir}/ignored', 'w') as gitignore_f:
+          gitignore_f.write('this file is gitignored')
+
+      with open(f'{git_repo_dir}/added-to-index', 'w') as added_f:
+          added_f.write('only added to git index\n')
+
+      subprocess.check_output(['git', 'add', 'added-to-index'], cwd=git_repo_dir)
+
+      with open(f'{git_repo_dir}/ignored-added-to-index', 'w') as ignored_f:
+          ignored_f.write('this file is gitignored but in the index already\n')
+
+      subprocess.check_output(['git', 'add', '-f', 'ignored-added-to-index'], cwd=git_repo_dir)
+
+      with open(f'{git_repo_dir}/untracked', 'w') as untracked_f:
+          untracked_f.write('this file is untracked\n')
+
+      os.mkdir(f'{git_repo_dir}/subdir')
+      with open(f'{git_repo_dir}/subdir/untracked', 'w') as untracked_f:
+          untracked_f.write('this file is untracked\n')
+
+      with open(f'{git_repo_dir}/subdir/untracked2', 'w') as untracked_f:
+          untracked_f.write('this file is also untracked\n')
+
+      return git_repo_dir, to_rm
+
+  except Exception:
+      for rm_dir in to_rm:
+          shutil.rmtree(rm_dir)
+      raise
+
+
+def run_test(repo_path, passed_files, filtered_files):
+    test_input = '\n'.join(
+        passed_files +
+        filtered_files +
+        [f'./{f}' for f in passed_files] +
+        [f'./{f}' for f in filtered_files]) + '\n'
+
+    test_script_dir = f'{repo_path}/test-script-dir'
+    os.mkdir(test_script_dir)
+
+    filter_script_path = f'{test_script_dir}/filter_untracked.py'
+    test_script_dirname = os.path.dirname(__file__) or os.getcwd()
+    shutil.copy(os.path.realpath(f'{test_script_dirname}/../../lint/filter_untracked.py'),
+                filter_script_path)
+    filter_proc = subprocess.Popen(
+        [sys.executable, filter_script_path],
+        cwd=repo_path,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        encoding='utf-8')
+    filter_output, _ = filter_proc.communicate(test_input)
+    filter_output_lines = [l for l in filter_output.split('\n') if l]
+
+    for pass_f in passed_files:
+        assert pass_f in filter_output_lines, (
+            f'expected in filter output: {pass_f}\filter output: {filter_output}')
+        assert f'./{pass_f}' in filter_output_lines, (
+            f'expected in filter output: ./{pass_f}\filter output: {filter_output}')
+
+    for filter_f in filtered_files:
+        assert filter_f not in filter_output_lines, (
+            f'expected not in filter output: {filter_f}\nfilter_output: {filter_output}')
+        assert f'./{filter_f}' not in filter_output_lines, (
+            f'expected not in filter output: ./{filter_f}\nfilter_output: {filter_output}')
+
+    assert len(filter_output_lines) == 2 * len(passed_files), (
+        f'expected {len(filter_output_lines)} == 2 * {len(passed_files)}')
+
+
+def test_filter_untracked():
+    repo_path, to_rm = setup_git_repo()
+    try:
+        passed_files = [
+            'committed',
+            'committed-ignored',
+            'added-to-index',
+            'ignored-added-to-index',
+        ]
+        filtered_files = [
+            'ignored',
+            'untracked',
+            'subdir/untracked',
+            'subdir/untracked2',
+        ]
+        run_test(repo_path, passed_files, filtered_files)
+
+    finally:
+        for rm_dir in to_rm:
+            shutil.rmtree(rm_dir)
+
+
+def test_worktree():
+    repo_path, to_rm = setup_git_repo(worktree=True)
+    try:
+        passed_files = [
+            'committed',
+            'committed-ignored',
+            'added-to-index',
+            'ignored-added-to-index',
+        ]
+        filtered_files = [
+            'ignored',
+            'untracked',
+            'subdir/untracked',
+            'subdir/untracked2',
+            '.git',
+        ]
+        run_test(repo_path, passed_files, filtered_files)
+
+    finally:
+        for rm_dir in to_rm:
+            shutil.rmtree(rm_dir)
+
+
+if __name__ == '__main__':
+    test_filter_untracked()
+    test_worktree()
index df1d055..7ac0611 100755 (executable)
@@ -31,40 +31,18 @@ echo "Check file types..."
 python3 tests/lint/check_file_type.py
 
 echo "Check ASF license header..."
-java -jar /bin/apache-rat.jar -E tests/lint/rat-excludes  -d . | (grep "== File" > /tmp/$$.apache-rat.txt || true)
-if grep --quiet -E "File" /tmp/$$.apache-rat.txt; then
-    echo "Need to add ASF header to the following files."
-    echo "----------------File List----------------"
-    cat /tmp/$$.apache-rat.txt
-    echo "-----------------------------------------"
-    echo "Use the following steps to add the headers:"
-    echo "- Create file_list.txt in your text editor"
-    echo "- Copy paste the above content in file-list into file_list.txt"
-    echo "- python3 tests/lint/add_asf_header.py file_list.txt"
-    exit 1
-fi
+tests/lint/check_asf_header.sh
 
 echo "Check codestyle of c++ code..."
-make cpplint
+tests/lint/cpplint.sh
 
 echo "clang-format check..."
-# check lastest change, for squash merge into master
-./tests/lint/git-clang-format.sh HEAD~1
-# chekc against origin/master for PRs.
-./tests/lint/git-clang-format.sh origin/master
+tests/lint/clang_format.sh
 
 echo "Check codestyle of python code..."
-make pylint
+tests/lint/pylint.sh
 echo "Check codestyle of jni code..."
-make jnilint
+tests/lint/jnilint.sh
 
 echo "Check documentations of c++ code..."
-make doc 2>/tmp/$$.log.txt
-
-grep -v -E "ENABLE_PREPROCESSING|unsupported tag" < /tmp/$$.log.txt > /tmp/$$.logclean.txt || true
-echo "---------Error Log----------"
-cat /tmp/$$.logclean.txt
-echo "----------------------------"
-if grep --quiet -E "warning|error" < /tmp/$$.logclean.txt; then
-    exit 1
-fi
+tests/lint/cppdocs.sh