Split Caffe2 CI into cmake-only and python builds (#15917)

author Jesse Hellemn <hellemn@fb.com>

Mon, 14 Jan 2019 23:10:49 +0000 (15:10 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Mon, 14 Jan 2019 23:20:44 +0000 (15:20 -0800)
author Jesse Hellemn <hellemn@fb.com>
Mon, 14 Jan 2019 23:10:49 +0000 (15:10 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Mon, 14 Jan 2019 23:20:44 +0000 (15:20 -0800)
diff --git a/.circleci/config.yml b/.circleci/config.yml

index 65f6a94..fada881 100644 (file)
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -155,24 +155,14 @@ caffe2_linux_build_defaults: &caffe2_linux_build_defaults
        no_output_timeout: "1h"
        command: |
          set -e
-        # TODO: merge this into Caffe2 build.sh
          cat >/home/circleci/project/ci_build_script.sh <<EOL
          # =================== The following code will be executed inside Docker container ===================
          set -ex
+        export BUILD_ENVIRONMENT="$BUILD_ENVIRONMENT"
  
          # Reinitialize submodules
          git submodule sync && git submodule update -q --init --recursive
  
-        mkdir -p build
-
-        # Configure additional cmake arguments
-        cmake_args=()
-        cmake_args+=("$CMAKE_ARGS")
-
-        if [[ $BUILD_ENVIRONMENT == *aten* ]]; then
-          cmake_args+=("-DBUILD_ATEN=ON")
-        fi
-
          # conda must be added to the path for Anaconda builds (this location must be
          # the same as that in install_anaconda.sh used to build the docker image)
          if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
@@ -180,17 +170,8 @@ caffe2_linux_build_defaults: &caffe2_linux_build_defaults
            sudo chown -R jenkins:jenkins '/opt/conda'
          fi
  
-        # set the env var for onnx build and test
-        if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
-          export INTEGRATED=1
-        fi
-
          # Build
-        if test -x ".jenkins/caffe2/build.sh"; then
-          ./.jenkins/caffe2/build.sh ${cmake_args[@]}
-        else
-          ./.jenkins/build.sh ${cmake_args[@]}
-        fi
+        ./.jenkins/caffe2/build.sh
  
          # Show sccache stats if it is running
          if pgrep sccache > /dev/null; then
@@ -210,7 +191,11 @@ caffe2_linux_build_defaults: &caffe2_linux_build_defaults
  
          # Push intermediate Docker image for next phase to use
          if [ -z "${BUILD_ONLY}" ]; then
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
+          if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+            export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-cmake-${CIRCLE_SHA1}
+          else
+            export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
+          fi
            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
            docker push ${COMMIT_DOCKER_IMAGE}
          fi
@@ -231,42 +216,24 @@ caffe2_linux_test_defaults: &caffe2_linux_test_defaults
          # =================== The following code will be executed inside Docker container ===================
          set -ex
  
+        export BUILD_ENVIRONMENT="$BUILD_ENVIRONMENT"
+
          # libdc1394 (dependency of OpenCV) expects /dev/raw1394 to exist...
          sudo ln /dev/null /dev/raw1394
  
-        # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04
-        # See comments on https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830
-        if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
-          sudo pip -q uninstall -y hypothesis
-          # "pip install hypothesis==3.44.6" from official server is unreliable on CircleCI, so we host a copy on S3 instead
-          sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
-          sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
-          sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
-        fi
-
          # conda must be added to the path for Anaconda builds (this location must be
          # the same as that in install_anaconda.sh used to build the docker image)
          if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
            export PATH=/opt/conda/bin:$PATH
          fi
  
-        # set the env var for onnx build and test
-        if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
-          export INTEGRATED=1
-        fi
-
          # Upgrade SSL module to avoid old SSL warnings
          pip -q install --user --upgrade pyOpenSSL ndg-httpsclient pyasn1
  
          pip -q install --user -b /tmp/pip_install_onnx "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
-        pip -q install --user future
  
          # Build
-        if test -x ".jenkins/caffe2/test.sh"; then
-          ./.jenkins/caffe2/test.sh
-        else
-          ./.jenkins/test.sh
-        fi
+        ./.jenkins/caffe2/test.sh
  
          # Remove benign core dumps.
          # These are tests for signal handling (including SIGABRT).
@@ -276,7 +243,11 @@ caffe2_linux_test_defaults: &caffe2_linux_test_defaults
          EOL
          chmod +x /home/circleci/project/ci_test_script.sh
  
-        export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
+        if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-cmake-${CIRCLE_SHA1}
+        else
+          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
+        fi
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
          docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
          if [ -n "${CUDA_VERSION}" ]; then
@@ -1197,6 +1168,23 @@ jobs:
      resource_class: gpu.medium
      <<: *caffe2_linux_test_defaults
  
+  caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build:
+    environment:
+      JOB_BASE_NAME: caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-build
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:238"
+      CUDA_VERSION: "9"
+      BUILD_ENVIRONMENT: "cmake-cuda9.0-cudnn7-ubuntu16.04"
+    <<: *caffe2_linux_build_defaults
+
+  caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_test:
+    environment:
+      JOB_BASE_NAME: caffe2-cmake-cuda9.0-cudnn7-ubuntu16.04-test
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:238"
+      CUDA_VERSION: "9"
+      BUILD_ENVIRONMENT: "cmake-cuda9.0-cudnn7-ubuntu16.04"
+    resource_class: gpu.medium
+    <<: *caffe2_linux_test_defaults
+
    caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build:
      environment:
        JOB_BASE_NAME: caffe2-py2-cuda9.1-cudnn7-ubuntu16.04-build
@@ -2971,10 +2959,10 @@ workflows:
        - pytorch_macos_10_13_cuda9_2_cudnn7_py3_build
  
        # Caffe2 builds
-      - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_build
-      - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_test:
+      - caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
+      - caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_test:
            requires:
-            - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_build
+            - caffe2_cmake_cuda9_0_cudnn7_ubuntu16_04_build
        - caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build
        - caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test:
            requires:
@@ -2991,10 +2979,12 @@ workflows:
        - caffe2_py2_gcc4_8_ubuntu14_04_test:
            requires:
              - caffe2_py2_gcc4_8_ubuntu14_04_build
+
        - caffe2_onnx_py2_gcc5_ubuntu16_04_build
        - caffe2_onnx_py2_gcc5_ubuntu16_04_test:
            requires:
              - caffe2_onnx_py2_gcc5_ubuntu16_04_build
+
        - caffe2_py2_cuda8_0_cudnn7_ubuntu16_04_build
        - caffe2_py2_clang3_8_ubuntu16_04_build
        - caffe2_py2_clang3_9_ubuntu16_04_build
diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh

index 4beb22d..b5efd64 100755 (executable)
--- a/.jenkins/caffe2/build.sh
+++ b/.jenkins/caffe2/build.sh
@@ -2,6 +2,8 @@
  
  set -ex
  
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
  # TODO: Migrate all centos jobs to use proper devtoolset
  if [[ "$BUILD_ENVIRONMENT" == "py2-cuda9.0-cudnn7-centos7" ]]; then
    # There is a bug in pango packge on Centos7 that causes undefined
@@ -10,15 +12,20 @@ if [[ "$BUILD_ENVIRONMENT" == "py2-cuda9.0-cudnn7-centos7" ]]; then
    sudo yum install -y -q glib2-2.56.1
  fi
  
-pip install --user --no-cache-dir hypothesis==3.59.0
+# CMAKE_ARGS are only passed to 'cmake' and the -Dfoo=bar does not work with
+# setup.py, so we build a list of foo=bars and then either convert it to
+# -Dfoo=bars or export them before running setup.py
+build_args=()
+build_to_cmake () {
+  cmake_args=()
+  for build_arg in $*; do
+    cmake_args+=("-D$build_arg")
+  done
+  echo ${cmake_args[@]}
+}
+
  
-# The INSTALL_PREFIX here must match up with test.sh
-INSTALL_PREFIX="/usr/local/caffe2"
-LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
-CMAKE_ARGS=()
  SCCACHE="$(which sccache)"
-
  if [ "$(which gcc)" != "/root/sccache/gcc" ]; then
    # Setup SCCACHE
    ###############################################################################
@@ -95,48 +102,39 @@ report_compile_cache_stats() {
    fi
  }
  
-###############################################################################
-# Explicitly set Python executable.
-###############################################################################
-# On Ubuntu 16.04 the default Python is still 2.7.
-PYTHON="$(which python)"
-if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
-  PYTHON=$(which "python${BASH_REMATCH[1]}")
-  CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=${PYTHON}")
-fi
-
  
  ###############################################################################
  # Use special scripts for Android and setup builds
  ###############################################################################
  if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
    export ANDROID_NDK=/opt/ndk
-  CMAKE_ARGS+=("-DBUILD_BINARY=ON")
-  CMAKE_ARGS+=("-DBUILD_TEST=ON")
-  CMAKE_ARGS+=("-DUSE_OBSERVERS=ON")
-  CMAKE_ARGS+=("-DUSE_ZSTD=ON")
-  "${ROOT_DIR}/scripts/build_android.sh" ${CMAKE_ARGS[*]} "$@"
+  build_args+=("BUILD_BINARY=ON")
+  build_args+=("BUILD_TEST=ON")
+  build_args+=("USE_OBSERVERS=ON")
+  build_args+=("USE_ZSTD=ON")
+  "${ROOT_DIR}/scripts/build_android.sh" $(build_to_cmake ${build_args[@]}) "$@"
    exit 0
  fi
  
-
  ###############################################################################
-# Set cmake args
+# Set parameters
  ###############################################################################
-CMAKE_ARGS+=("-DBUILD_BINARY=ON")
-CMAKE_ARGS+=("-DBUILD_TEST=ON")
-CMAKE_ARGS+=("-DINSTALL_TEST=ON")
-CMAKE_ARGS+=("-DUSE_OBSERVERS=ON")
-CMAKE_ARGS+=("-DUSE_ZSTD=ON")
-CMAKE_ARGS+=("-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}")
-
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  build_args+=("BUILD_PYTHON=OFF")
+else
+  build_args+=("BUILD_PYTHON=ON")
+  build_args+=("PYTHON_EXECUTABLE=${PYTHON}")
+fi
  if [[ $BUILD_ENVIRONMENT == *mkl* ]]; then
-  CMAKE_ARGS+=("-DBLAS=MKL")
-  CMAKE_ARGS+=("-DUSE_MKLDNN=ON")
+  build_args+=("BLAS=MKL")
+  build_args+=("USE_MKLDNN=ON")
  fi
+build_args+=("BUILD_BINARY=ON")
+build_args+=("BUILD_TEST=ON")
+build_args+=("INSTALL_TEST=ON")
+build_args+=("USE_ZSTD=ON")
  
  if [[ $BUILD_ENVIRONMENT == py2-cuda9.0-cudnn7-ubuntu16.04 ]]; then
-
    # removing http:// duplicate in favor of nvidia-ml.list
    # which is https:// version of the same repo
    sudo rm -f /etc/apt/sources.list.d/nvidia-machine-learning.list
@@ -147,16 +145,18 @@ if [[ $BUILD_ENVIRONMENT == py2-cuda9.0-cudnn7-ubuntu16.04 ]]; then
    sudo apt-get install libnvinfer5 libnvinfer-dev
    rm ./nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda9.0_1-1_amd64.deb
  
-  CMAKE_ARGS+=("-DUSE_TENSORRT=ON")
+  build_args+=("USE_TENSORRT=ON")
  fi
  
  if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
-  CMAKE_ARGS+=("-DUSE_CUDA=ON")
-  CMAKE_ARGS+=("-DCUDA_ARCH_NAME=Maxwell")
-  CMAKE_ARGS+=("-DUSE_NNPACK=OFF")
+  build_args+=("USE_CUDA=ON")
+  build_args+=("USE_NNPACK=OFF")
+
+  # Target only our CI GPU machine's CUDA arch to speed up the build
+  build_args+=("TORCH_CUDA_ARCH_LIST=Maxwell")
  
    # Explicitly set path to NVCC such that the symlink to ccache or sccache is used
-  CMAKE_ARGS+=("-DCUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
+  build_args+=("CUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
  
    # Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
    # Setting PATH to resolve to the right nvcc alone isn't enough.
@@ -167,10 +167,16 @@ if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
    export PATH="/usr/local/cuda/bin:$PATH"
  fi
  if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
+  build_args+=("USE_ROCM=ON")
    # This is needed to enable ImageInput operator in resnet50_trainer
-  CMAKE_ARGS+=("-USE_OPENCV=ON")
+  build_args+=("USE_OPENCV=ON")
    # This is needed to read datasets from https://download.caffe2.ai/databases/resnet_trainer.zip
-  CMAKE_ARGS+=("-USE_LMDB=ON")
+  build_args+=("USE_LMDB=ON")
+  # When hcc runs out of memory, it silently exits without stopping
+  # the build process, leaving undefined symbols in the shared lib
+  # which will cause undefined symbol errors when later running
+  # tests. Setting MAX_JOBS to smaller number to make CI less flaky.
+  export MAX_JOBS=4
  
    ########## HIPIFY Caffe2 operators
    ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_amd.py"
@@ -179,37 +185,25 @@ fi
  # building bundled nccl in this config triggers a bug in nvlink. For
  # more, see https://github.com/pytorch/pytorch/issues/14486
  if [[ "${BUILD_ENVIRONMENT}" == *-cuda8*-cudnn7* ]]; then
-    CMAKE_ARGS+=("-DUSE_SYSTEM_NCCL=ON")
+    build_args+=("USE_SYSTEM_NCCL=ON")
  fi
  
  # Try to include Redis support for Linux builds
  if [ "$(uname)" == "Linux" ]; then
-  CMAKE_ARGS+=("-DUSE_REDIS=ON")
-fi
-
-# Currently, on Jenkins mac os, we will use custom protobuf. Mac OS
-# contbuild at the moment is minimal dependency - it doesn't use glog
-# or gflags either.
-if [ "$(uname)" == "Darwin" ]; then
-  CMAKE_ARGS+=("-DBUILD_CUSTOM_PROTOBUF=ON")
+  build_args+=("USE_REDIS=ON")
  fi
  
  # Use a speciallized onnx namespace in CI to catch hardcoded onnx namespace
-CMAKE_ARGS+=("-DONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI")
-
-# We test the presence of cmake3 (for platforms like Centos and Ubuntu 14.04)
-# and use that if so.
-if [[ -x "$(command -v cmake3)" ]]; then
-    CMAKE_BINARY=cmake3
-else
-    CMAKE_BINARY=cmake
-fi
+build_args+=("ONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI")
  
  ###############################################################################
  # Configure and make
  ###############################################################################
  
-if [[ -z "$INTEGRATED" ]]; then
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  # cmake-only non-setup.py build, to test cpp only bits. This installs into
+  # /usr/local/caffe2 and installs no Python tests
+  build_args+=("CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}")
  
    # Run cmake from ./build_caffe2 directory so it doesn't conflict with
    # standard PyTorch build directory. Eventually these won't need to
@@ -218,8 +212,16 @@ if [[ -z "$INTEGRATED" ]]; then
    mkdir build_caffe2
    cd ./build_caffe2
  
+  # We test the presence of cmake3 (for platforms like Centos and Ubuntu 14.04)
+  # and use that if so.
+  if [[ -x "$(command -v cmake3)" ]]; then
+      CMAKE_BINARY=cmake3
+  else
+      CMAKE_BINARY=cmake
+  fi
+
    # Configure
-  ${CMAKE_BINARY} "${ROOT_DIR}" ${CMAKE_ARGS[*]} "$@"
+  ${CMAKE_BINARY} "${ROOT_DIR}" $(build_to_cmake ${build_args[@]}) "$@"
  
    # Build
    if [ "$(uname)" == "Linux" ]; then
@@ -235,6 +237,18 @@ if [[ -z "$INTEGRATED" ]]; then
    ls $INSTALL_PREFIX
  
  else
+  # Python build. Uses setup.py to install into site-packages
+  build_args+=("USE_LEVELDB=ON")
+  build_args+=("USE_LMDB=ON")
+  build_args+=("USE_OPENCV=ON")
+  build_args+=("BUILD_TEST=ON")
+  # These flags preserve the flags that were used before this refactor (blame
+  # me)
+  build_args+=("USE_GLOG=ON")
+  build_args+=("USE_GFLAGS=ON")
+  build_args+=("USE_FBGEMM=OFF")
+  build_args+=("USE_MKLDNN=OFF")
+  build_args+=("USE_DISTRIBUTED=ON")
  
    # sccache will be stuck if  all cores are used for compiling
    # see https://github.com/pytorch/pytorch/pull/7361
@@ -242,9 +256,15 @@ else
      export MAX_JOBS=`expr $(nproc) - 1`
    fi
  
-  USE_LEVELDB=1 USE_LMDB=1 USE_OPENCV=1 BUILD_TEST=1 BUILD_BINARY=1 python setup.py install --user
+  for build_arg in "${build_args[@]}"; do
+    export $build_arg
+  done
+  $PYTHON setup.py install --user
  
-  # This is to save test binaries for testing
+  # This is to save test binaries for testing. Copying caffe2/test to
+  # INSTALL_PREFIX, which is /usr/local/caffe2/, enables these setup.py builds
+  # to share cpp-tests test-code with the cmake-only build above. In test.sh
+  # the cpp tests are run in install_prefix
    cp -r torch/lib/tmp_install $INSTALL_PREFIX
    mkdir -p "$INSTALL_PREFIX/cpp_test/"
    cp -r caffe2/test/* "$INSTALL_PREFIX/cpp_test/"
@@ -262,38 +282,3 @@ fi
  pip install --user -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
  
  report_compile_cache_stats
-
-# Symlink the caffe2 base python path into the system python path,
-# so that we can import caffe2 without having to change $PYTHONPATH.
-# Run in a subshell to contain environment set by /etc/os-release.
-#
-# This is only done when running on Jenkins!  We don't want to pollute
-# the user environment with Python symlinks and ld.so.conf.d hacks.
-#
-if [[ -z "$INTEGRATED" ]]; then
-  if [ -n "${JENKINS_URL}" ]; then
-    (
-      source /etc/os-release
-
-      function python_version() {
-        "$PYTHON" -c 'import sys; print("python%d.%d" % sys.version_info[0:2])'
-      }
-
-      # Debian/Ubuntu
-      if [[ "$ID_LIKE" == *debian* ]]; then
-        python_path="/usr/local/lib/$(python_version)/dist-packages"
-        sudo ln -sf "${INSTALL_PREFIX}/caffe2" "${python_path}"
-      fi
-
-      # RHEL/CentOS
-      if [[ "$ID_LIKE" == *rhel* ]]; then
-        python_path="/usr/lib64/$(python_version)/site-packages/"
-        sudo ln -sf "${INSTALL_PREFIX}/caffe2" "${python_path}"
-      fi
-
-      # /etc/ld.so.conf.d is used on both Debian and RHEL
-      echo "${INSTALL_PREFIX}/lib" | sudo tee /etc/ld.so.conf.d/caffe2.conf
-      sudo ldconfig
-    )
-  fi
-fi
diff --git a/.jenkins/caffe2/common.sh b/.jenkins/caffe2/common.sh

index 32f8674..7704c11 100644 (file)
--- a/.jenkins/caffe2/common.sh
+++ b/.jenkins/caffe2/common.sh
@@ -2,21 +2,17 @@ set -ex
  
  LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
  ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
+TEST_DIR="$ROOT_DIR/caffe2_tests"
+gtest_reports_dir="${TEST_DIR}/cpp"
+pytest_reports_dir="${TEST_DIR}/python"
  
  # Figure out which Python to use
-PYTHON="python"
+PYTHON="$(which python)"
  if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
-  PYTHON="python${BASH_REMATCH[1]}"
+  PYTHON=$(which "python${BASH_REMATCH[1]}")
  fi
  
-# Find where Caffe2 is installed. This will be the absolute path to the
-# site-packages of the active Python installation
+# /usr/local/caffe2 is where the cpp bits are installed to in in cmake-only
+# builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
+# that the test code in .jenkins/test.sh is the same
  INSTALL_PREFIX="/usr/local/caffe2"
-SITE_DIR=$($PYTHON -c "from distutils import sysconfig; print(sysconfig.get_python_lib(prefix=''))")
-INSTALL_SITE_DIR="${INSTALL_PREFIX}/${SITE_DIR}"
-CAFFE2_PYPATH="$INSTALL_SITE_DIR/caffe2"
-
-# Set PYTHONPATH and LD_LIBRARY_PATH so that python can find the installed
-# Caffe2.
-export PYTHONPATH="${PYTHONPATH}:$INSTALL_SITE_DIR"
-export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${INSTALL_PREFIX}/lib"
diff --git a/.jenkins/caffe2/test.sh b/.jenkins/caffe2/test.sh

index 623639b..6800393 100755 (executable)
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@@ -8,9 +8,6 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
    exit 0
  fi
  
-cd "$ROOT_DIR"
-
-TEST_DIR="$ROOT_DIR/caffe2_tests"
  rm -rf "$TEST_DIR" && mkdir -p "$TEST_DIR"
  
  cd "${WORKSPACE}"
@@ -20,7 +17,7 @@ cd "${WORKSPACE}"
  #############
  
  echo "Running C++ tests.."
-gtest_reports_dir="${TEST_DIR}/cpp"
+export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${INSTALL_PREFIX}/lib"
  mkdir -p "$gtest_reports_dir"
  for test in $(find "${INSTALL_PREFIX}/cpp_test" -executable -type f); do
    case "$test" in
@@ -51,11 +48,60 @@ for test in $(find "${INSTALL_PREFIX}/cpp_test" -executable -type f); do
    esac
  done
  
-################
+################################################################################
  # Python tests #
-################
+################################################################################
+if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
+  exit 0
+fi
+
+# Ideally this would be where the Python bits get installed to from using
+# setup.py. However on our dockers this is not correct for two reasons.
+# 1. This lies in /usr/local/lib/pythonM.m, but the dockers don't have the
+#    right permissions setup so the build doesn't have write access to this
+#    dir.  For this reason we use --user flag in all pip install instructions
+#    which install into the $HOME/.local directory instead.
+# 2. This returns lib/pythonM.m/dist-packages, but we install in site-packages.
+# We use this same way of getting the install directory in other places in our
+# build, so not really sure why it is not correct here
+INSTALL_SITE_DIR=$($PYTHON -c "from distutils import sysconfig; print(sysconfig.get_python_lib())")
+if [[ -n "$(find $INSTALL_SITE_DIR -name caffe2 2>/dev/null)" ]]; then
+  # Caffe2 will probably be found here if using a Python from a virtualenv or
+  # from conda
+  CAFFE2_PYPATH="$INSTALL_SITE_DIR/caffe2"
+elif [[ -n "$(find $HOME/.local/lib -name caffe2 2>/dev/null)" ]]; then
+  # Caffe2 will be found here in the case of using no env and adding --user to
+  # the setup.py call
+  pyver=($(python --version 2>&1))
+  pyver=${pyver[1]}
+  pyver=${pyver:0:3}
+  CAFFE2_PYPATH="$HOME/.local/lib/python$pyver/site-packages/caffe2"
+else
+  echo "I do not know where Caffe2 is installed"
+  find / -name caffe2 2>/dev/null
+  exit 1
+fi
+if [[ ! -d "$CAFFE2_PYPATH" ]]; then
+  echo "Failed to find where Caffe2 Python bits are installed"
+  find / -name caffe2 2>/dev/null
+  exit 1
+fi
+
+
+if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
+  # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04
+  # See comments on
+  # https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830
+  sudo pip -q uninstall -y hypothesis
+  # "pip install hypothesis==3.44.6" from official server is unreliable on
+  # CircleCI, so we host a copy on S3 instead
+  sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
+  sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
+  sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
+else
+  pip install --user --no-cache-dir hypothesis==3.59.0
+fi
  
-pytest_reports_dir="${TEST_DIR}/python"
  mkdir -p "$pytest_reports_dir"
  
  # Collect additional tests to run (outside caffe2/python)
@@ -75,6 +121,10 @@ if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
    rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/piecewise_linear_transform_test.py")
    rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/softmax_ops_test.py")
    rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/unique_ops_test.py")
+
+  # On ROCm, RCCL (distributed) development isn't complete.
+  # https://github.com/ROCmSoftwarePlatform/rccl
+  rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/data_parallel_model_test.py")
  fi
  
  # NB: Warnings are disabled because they make it harder to see what
@@ -95,9 +145,13 @@ pip install --user pytest-sugar
    "$CAFFE2_PYPATH/python" \
    "${EXTRA_TESTS[@]}"
  
+#####################
+# torchvision tests #
+#####################
+
  cd ${INSTALL_PREFIX}
  
-if [[ -n "$INTEGRATED" ]]; then
+if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
    pip install --user torchvision
    "$ROOT_DIR/scripts/onnx/test.sh"
  fi
diff --git a/caffe2/python/operator_test/layer_norm_op_test.py b/caffe2/python/operator_test/layer_norm_op_test.py

index 052e2f7..6be3fe0 100644 (file)
--- a/caffe2/python/operator_test/layer_norm_op_test.py
+++ b/caffe2/python/operator_test/layer_norm_op_test.py
@@ -11,6 +11,7 @@ import caffe2.python.serialized_test.serialized_test_util as serial
  import numpy as np
  import os
  import unittest
+import torch
  
  
  class TestLayerNormOp(serial.SerializedTestCase):
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake

index b302d19..84f73d4 100644 (file)
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -751,17 +751,14 @@ if(USE_ROCM)
    else()
      caffe2_update_option(USE_ROCM OFF)
    endif()
-endif()
  
-# ---[ ROCm
-if(USE_ROCM)
- include_directories(SYSTEM ${HIP_PATH}/include)
- include_directories(SYSTEM ${ROCBLAS_PATH}/include)
- include_directories(SYSTEM ${ROCFFT_PATH}/include)
- include_directories(SYSTEM ${HIPSPARSE_PATH}/include)
- include_directories(SYSTEM ${HIPRAND_PATH}/include)
- include_directories(SYSTEM ${ROCRAND_PATH}/include)
- include_directories(SYSTEM ${THRUST_PATH})
+  include_directories(SYSTEM ${HIP_PATH}/include)
+  include_directories(SYSTEM ${ROCBLAS_PATH}/include)
+  include_directories(SYSTEM ${ROCFFT_PATH}/include)
+  include_directories(SYSTEM ${HIPSPARSE_PATH}/include)
+  include_directories(SYSTEM ${HIPRAND_PATH}/include)
+  include_directories(SYSTEM ${ROCRAND_PATH}/include)
+  include_directories(SYSTEM ${THRUST_PATH})
  endif()
  
  # ---[ NCCL
diff --git a/scripts/onnx/test.sh b/scripts/onnx/test.sh

index c928f97..22afe0a 100755 (executable)
--- a/scripts/onnx/test.sh
+++ b/scripts/onnx/test.sh
@@ -35,8 +35,14 @@ test_paths=(
      "$top_dir/test/onnx"
  )
  
+args=()
+args+=("-v")
  if [[ $PARALLEL == 1 ]]; then
-    pytest -n 3 "${test_paths[@]}"
-else
-    pytest "${test_paths[@]}"
+  args+=("-n")
+  args+=("3")
  fi
+
+pytest "${args[@]}" \
+  -k \
+  'not (TestOperators and test_full_like) and not (TestOperators and test_zeros_like) and not (TestOperators and test_ones_like) and not (TestModels and test_super_resolution) and not (TestModels and test_vgg16) and not (TestModels and test_vgg16_bn) and not (TestModels and test_vgg19) and not (TestModels and test_vgg19_bn)' \
+  "${test_paths[@]}"
diff --git a/setup.py b/setup.py

index 8a4cac4..4fd8dca 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -91,6 +91,20 @@
  #     specify a namespace for ONNX built here rather than the hard-coded
  #     one in this file; needed to build with other frameworks that share ONNX.
  #
+#   BLAS
+#     BLAS to be used by Caffe2. Can be MKL, Eigen, ATLAS, or OpenBLAS. If set
+#     then the build will fail if the requested BLAS is not found, otherwise
+#     the BLAS will be chosen based on what is found on your system.
+#
+#   USE_FBGEMM
+#     Enables use of FBGEMM
+#
+#   USE_REDIS
+#     Whether to use Redis for distributed workflows (Linux only)
+#
+#   USE_ZSTD
+#     Enables use of ZSTD, if the libraries are found
+#
  # Environment variables we respect (these environment variables are
  # conventional and are often understood/set by other software.)
  #
@@ -102,6 +116,9 @@
  #     specify a different compiler than the system one to use as the CUDA
  #     host compiler for nvcc.
  #
+#   CUDA_NVCC_EXECUTABLE
+#     Specify a NVCC to use. This is used in our CI to point to a cached nvcc
+#
  #   CUDNN_LIB_DIR
  #   CUDNN_INCLUDE_DIR
  #   CUDNN_LIBRARY
diff --git a/test/onnx/test_models.py b/test/onnx/test_models.py

index 4625a50..f3cbc44 100644 (file)
--- a/test/onnx/test_models.py
+++ b/test/onnx/test_models.py
@@ -12,7 +12,7 @@ from model_defs.srresnet import SRResNet
  from model_defs.dcgan import _netD, _netG, weights_init, bsz, imgsz, nz
  from model_defs.op_test import DummyNet, ConcatNet, PermuteNet, PReluNet
  
-from test_pytorch_common import TestCase, run_tests, skipIfNoLapack, skipIfCI
+from test_pytorch_common import TestCase, run_tests, skipIfNoLapack
  
  import torch
  import torch.onnx
@@ -77,7 +77,6 @@ class TestModels(TestCase):
          x = Variable(torch.randn(1, 3, 224, 224).fill_(1.0))
          self.exportTest(toC(SRResNet(rescale_factor=4, n_filters=64, n_blocks=8)), toC(x))
  
-    @skipIfCI
      @skipIfNoLapack
      def test_super_resolution(self):
          x = Variable(
@@ -96,25 +95,21 @@ class TestModels(TestCase):
          x = Variable(torch.randn(BATCH_SIZE, 1, 28, 28).fill_(1.0))
          self.exportTest(toC(MNIST()), toC(x))
  
-    @skipIfCI
      def test_vgg16(self):
          # VGG 16-layer model (configuration "D")
          x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
          self.exportTest(toC(vgg16()), toC(x))
  
-    @skipIfCI
      def test_vgg16_bn(self):
          # VGG 16-layer model (configuration "D") with batch normalization
          x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
          self.exportTest(toC(vgg16_bn()), toC(x))
  
-    @skipIfCI
      def test_vgg19(self):
          # VGG 19-layer model (configuration "E")
          x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
          self.exportTest(toC(vgg19()), toC(x))
  
-    @skipIfCI
      def test_vgg19_bn(self):
          # VGG 19-layer model (configuration 'E') with batch normalization
          x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
diff --git a/test/onnx/test_operators.py b/test/onnx/test_operators.py

index 8484f9e..af53a3e 100644 (file)
--- a/test/onnx/test_operators.py
+++ b/test/onnx/test_operators.py
@@ -17,8 +17,6 @@ import shutil
  import sys
  import common_utils as common
  
-from test_pytorch_common import skipIfCI
-
  
  '''Usage: python test/onnx/test_operators.py [--no-onnx] [--produce-onnx-test-data]
            --no-onnx: no onnx python dependence
@@ -300,7 +298,6 @@ class TestOperators(TestCase):
          x = torch.randn(3, 4, requires_grad=True)
          self.assertONNX(lambda x: torch.full(x.shape, 2), x)
  
-    @skipIfCI
      def test_full_like(self):
          x = torch.randn(3, 4, requires_grad=True)
          self.assertONNX(lambda x: torch.full_like(x, 2), x)
@@ -488,12 +485,10 @@ class TestOperators(TestCase):
          x = torch.randn(3, 4)
          self.assertONNX(torch.nn.Linear(4, 5, bias=True), x)
  
-    @skipIfCI
      def test_zeros_like(self):
          x = torch.randn(5, 8, requires_grad=True)
          self.assertONNX(lambda x: torch.zeros_like(x), x)
  
-    @skipIfCI
      def test_ones_like(self):
          x = torch.randn(6, 10, requires_grad=True)
          self.assertONNX(lambda x: torch.ones_like(x), x)
diff --git a/test/onnx/test_pytorch_common.py b/test/onnx/test_pytorch_common.py

index 304f9c4..ce61b5c 100644 (file)
--- a/test/onnx/test_pytorch_common.py
+++ b/test/onnx/test_pytorch_common.py
@@ -35,10 +35,6 @@ skipIfNoCuda = _skipper(lambda: not torch.cuda.is_available(),
  skipIfTravis = _skipper(lambda: os.getenv('TRAVIS'),
                          'Skip In Travis')
  
-skipIfCI = _skipper(lambda: os.getenv('CI') or os.getenv('TRAVIS') or
-                    os.getenv('JENKINS_URL') or os.getenv('INTEGRATED'),
-                    'Skip In CI')
-
  
  def flatten(x):
      return tuple(function._iter_filter(lambda o: isinstance(o, torch.Tensor))(x))
diff --git a/tools/build_pytorch_libs.sh b/tools/build_pytorch_libs.sh

index 82ca8aa..4a550ff 100755 (executable)
--- a/tools/build_pytorch_libs.sh
+++ b/tools/build_pytorch_libs.sh
@@ -204,6 +204,21 @@ function build_caffe2() {
    if [[ -n $CMAKE_PREFIX_PATH ]]; then
      EXTRA_CAFFE2_CMAKE_FLAGS+=("-DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH")
    fi
+  if [[ -n $BLAS ]]; then
+    EXTRA_CAFFE2_CMAKE_FLAGS+=("-DBLAS=$BLAS")
+  fi
+  if [[ -n $CUDA_NVCC_EXECUTABLE ]]; then
+    EXTRA_CAFFE2_CMAKE_FLAGS+=("-DCUDA_NVCC_EXECUTABLE=$CUDA_NVCC_EXECUTABLE")
+  fi
+  if [[ -n $USE_REDIS ]]; then
+    EXTRA_CAFFE2_CMAKE_FLAGS+=("-DUSE_REDIS=$USE_REDIS")
+  fi
+  if [[ -n $USE_GLOG ]]; then
+    EXTRA_CAFFE2_CMAKE_FLAGS+=("-DUSE_GLOG=$USE_GLOG")
+  fi
+  if [[ -n $USE_GFLAGS ]]; then
+    EXTRA_CAFFE2_CMAKE_FLAGS+=("-DUSE_GFLAGS=$USE_GFLAGS")
+  fi
  
    if [[ $RERUN_CMAKE -eq 1 ]] || [ ! -f CMakeCache.txt ]; then
        ${CMAKE_COMMAND} $BASE_DIR \
@@ -240,8 +255,6 @@ function build_caffe2() {
                        -DUSE_QNNPACK=$USE_QNNPACK \
                        -DUSE_TENSORRT=$USE_TENSORRT \
                        -DUSE_FFMPEG=$USE_FFMPEG \
-                      -DUSE_GLOG=OFF \
-                      -DUSE_GFLAGS=OFF \
                        -DUSE_SYSTEM_EIGEN_INSTALL=OFF \
                        -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \
                        -DCUDNN_LIB_DIR=$CUDNN_LIB_DIR \
diff --git a/tools/setup_helpers/build.py b/tools/setup_helpers/build.py

index 252da64..e7f1d9b 100644 (file)
--- a/tools/setup_helpers/build.py
+++ b/tools/setup_helpers/build.py
@@ -1,11 +1,19 @@
+import os
  from .env import check_env_flag, check_negative_env_flag
  
+BLAS = os.getenv('BLAS')
  BUILD_BINARY = check_env_flag('BUILD_BINARY')
  BUILD_TEST = not check_negative_env_flag('BUILD_TEST')
  BUILD_CAFFE2_OPS = not check_negative_env_flag('BUILD_CAFFE2_OPS')
  USE_LEVELDB = check_env_flag('USE_LEVELDB')
  USE_LMDB = check_env_flag('USE_LMDB')
  USE_OPENCV = check_env_flag('USE_OPENCV')
+USE_REDIS = check_env_flag('USE_REDIS')
  USE_TENSORRT = check_env_flag('USE_TENSORRT')
  USE_FFMPEG = check_env_flag('USE_FFMPEG')
  USE_FBGEMM = not (check_env_flag('NO_FBGEMM') or check_negative_env_flag('USE_FBGEMM'))
+USE_ZSTD = check_env_flag('USE_ZSTD')
+
+# These aren't in ./cuda.py because they need to be passed directly to cmake,
+# since cmake files expect them
+CUDA_NVCC_EXECUTABLE = os.getenv('CUDA_NVCC_EXECUTABLE')
diff --git a/tools/setup_helpers/configure.py b/tools/setup_helpers/configure.py

index 480000f..d7affe5 100644 (file)
--- a/tools/setup_helpers/configure.py
+++ b/tools/setup_helpers/configure.py
@@ -8,9 +8,9 @@ from .env import (IS_ARM, IS_DARWIN, IS_LINUX, IS_PPC, IS_WINDOWS,
  
  hotpatch_build_env_vars()
  
-from .build import (BUILD_BINARY, BUILD_CAFFE2_OPS, BUILD_TEST, USE_FBGEMM,
-                    USE_FFMPEG, USE_LEVELDB, USE_LMDB, USE_OPENCV,
-                    USE_TENSORRT)
+from .build import (BLAS, BUILD_BINARY, BUILD_CAFFE2_OPS, BUILD_TEST,
+                    USE_FBGEMM, USE_FFMPEG, USE_LEVELDB, USE_LMDB, USE_OPENCV,
+                    USE_REDIS, USE_TENSORRT, USE_ZSTD, CUDA_NVCC_EXECUTABLE)
  from .cuda import CUDA_HOME, CUDA_VERSION, USE_CUDA
  from .cudnn import CUDNN_INCLUDE_DIR, CUDNN_LIB_DIR, CUDNN_LIBRARY, USE_CUDNN
  from .dist_check import USE_DISTRIBUTED, USE_GLOO_IBVERBS
@@ -66,6 +66,8 @@ def get_common_env_with_flags():
      my_env = os.environ.copy()
      my_env["PYTORCH_PYTHON"] = sys.executable
      my_env["ONNX_NAMESPACE"] = ONNX_NAMESPACE
+    if BLAS:
+        my_env["BLAS"] = BLAS
      if USE_SYSTEM_NCCL:
          my_env["NCCL_ROOT_DIR"] = NCCL_ROOT_DIR
          my_env["NCCL_INCLUDE_DIR"] = NCCL_INCLUDE_DIR
@@ -75,6 +77,8 @@ def get_common_env_with_flags():
          extra_flags += ['--use-cuda']
          if IS_WINDOWS:
              my_env["NVTOOLEXT_HOME"] = NVTOOLEXT_HOME
+        if CUDA_NVCC_EXECUTABLE:
+            my_env["CUDA_NVCC_EXECUTABLE"] = CUDA_NVCC_EXECUTABLE
      if USE_CUDA_STATIC_LINK:
          extra_flags += ['--cuda-static-link']
      if USE_FBGEMM:
author	Jesse Hellemn <hellemn@fb.com>
	Mon, 14 Jan 2019 23:10:49 +0000 (15:10 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Mon, 14 Jan 2019 23:20:44 +0000 (15:20 -0800)
.circleci/config.yml		patch \| blob \| history
.jenkins/caffe2/build.sh		patch \| blob \| history
.jenkins/caffe2/common.sh		patch \| blob \| history
.jenkins/caffe2/test.sh		patch \| blob \| history
caffe2/python/operator_test/layer_norm_op_test.py		patch \| blob \| history
cmake/Dependencies.cmake		patch \| blob \| history
scripts/onnx/test.sh		patch \| blob \| history
setup.py		patch \| blob \| history
test/onnx/test_models.py		patch \| blob \| history
test/onnx/test_operators.py		patch \| blob \| history
test/onnx/test_pytorch_common.py		patch \| blob \| history
tools/build_pytorch_libs.sh		patch \| blob \| history
tools/setup_helpers/build.py		patch \| blob \| history
tools/setup_helpers/configure.py		patch \| blob \| history