Switch our Linux machine AMI to a newer image. (#18433)
authorEdward Yang <ezyang@fb.com>
Wed, 3 Apr 2019 20:38:56 +0000 (13:38 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 3 Apr 2019 20:50:37 +0000 (13:50 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18433
ghimport-source-id: 1c92f98b091232c0045a2e1db75d19c1f258ac1f

Differential Revision: D14748827

Pulled By: ezyang

fbshipit-source-id: a459451058cf5560811403bafb96c6ff083d7e3a

.circleci/config.yml
.circleci/verbatim-sources/header-section.yml
.circleci/verbatim-sources/job-specs-custom.yml
.circleci/verbatim-sources/job-specs-html-update.yml
.circleci/verbatim-sources/linux-binary-build-defaults.yml
.circleci/verbatim-sources/linux-build-defaults.yml
.circleci/verbatim-sources/nightly-build-smoke-tests-defaults.yml

index db6d274..46db2a7 100644 (file)
@@ -27,26 +27,37 @@ setup_linux_system_environment: &setup_linux_system_environment
     # Set up CircleCI GPG keys for apt, if needed
     curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
 
-# NOTE: We only perform the merge in build step and not in test step, because
-# all source files will be shared from build to test
-install_official_git_client: &install_official_git_client
-  name: Install Official Git Client
-  no_output_timeout: "1h"
-  command: |
-    set -ex
-
-    sudo killall apt-get || true
-    sudo rm /var/lib/apt/lists/lock || true
-    sudo rm /var/cache/apt/archives/lock || true
-    sudo rm /var/lib/dpkg/lock || true
+    # Stop background apt updates.  Hypothetically, the kill should not
+    # be necessary, because stop is supposed to send a kill signal to
+    # the process, but we've added it for good luck.  Also
+    # hypothetically, it's supposed to be unnecessary to wait for
+    # the process to block.  We also have that line for good luck.
+    # If you like, try deleting them and seeing if it works.
+    sudo systemctl stop apt-daily.service || true
+    sudo systemctl kill --kill-who=all apt-daily.service || true
+
+    sudo systemctl stop unattended-upgrades.service || true
+    sudo systemctl kill --kill-who=all unattended-upgrades.service || true
+
+    # wait until `apt-get update` has been killed
+    while systemctl is-active --quiet apt-daily.service
+    do
+      sleep 1;
+    done
+    while systemctl is-active --quiet unattended-upgrades.service
+    do
+      sleep 1;
+    done
+
+    # See if we actually were successful
+    systemctl list-units --all | cat
+
+    sudo apt-get purge -y unattended-upgrades
 
     cat /etc/apt/sources.list
-    sudo sed -i 's#archive.ubuntu.com/ubuntu#us-east-1.ec2.archive.ubuntu.com/ubuntu#g' /etc/apt/sources.list
-    sudo sed -i 's#security.ubuntu.com/ubuntu#us-east-1.ec2.archive.ubuntu.com/ubuntu#g' /etc/apt/sources.list
-    cat /etc/apt/sources.list
 
-    sudo apt-get -q -y update
-    sudo apt-get -q -y install openssh-client git
+    ps ax | grep apt
+    ps ax | grep dpkg
 
 install_doc_push_script: &install_doc_push_script
   name: Install the doc push script
@@ -189,24 +200,30 @@ setup_ci_environment: &setup_ci_environment
 
     # Set up NVIDIA docker repo
     curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-    echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
-    echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
-    echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
 
-    sudo apt-get -q -y update
-    sudo apt-get -q -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
+    sudo apt-get -y update
+    sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
     # WARNING: Docker version is hardcoded here; you must update the
     # version number below for docker-ce and nvidia-docker2 to get newer
     # versions of Docker.  We hardcode these numbers because we kept
     # getting broken CI when Docker would update their docker version,
     # and nvidia-docker2 would be out of date for a day until they
     # released a newer version of their package.
-    sudo apt-get -q -y install \
+    #
+    # How to figure out what the correct versions of these packages are?
+    # My preferred method is to start a Docker instance of the correct
+    # Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask
+    # apt what the packages you need are.  Note that the CircleCI image
+    # comes with Docker.
+    sudo apt-get -y install \
       linux-headers-$(uname -r) \
       linux-image-generic \
       moreutils \
-      docker-ce=18.06.2~ce~3-0~ubuntu \
-      nvidia-docker2=2.0.3+docker18.06.2-1 \
+      docker-ce=5:18.09.4~3-0~ubuntu-xenial \
+      nvidia-docker2=2.0.3+docker18.09.4-1 \
       expect-dev
 
     sudo pkill -SIGHUP dockerd
@@ -214,8 +231,9 @@ setup_ci_environment: &setup_ci_environment
     sudo pip -q install awscli==1.16.35
 
     if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
-      wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-410.79.run'
-      sudo /bin/bash ./NVIDIA-Linux-x86_64-410.79.run -s --no-drm
+      DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
+      wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
+      sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
       nvidia-smi
     fi
 
@@ -273,12 +291,10 @@ macos_brew_update: &macos_brew_update
 pytorch_linux_build_defaults: &pytorch_linux_build_defaults
   resource_class: large
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
-  - run:
-      <<: *install_official_git_client
   - checkout
   - run:
       <<: *setup_ci_environment
@@ -308,7 +324,7 @@ pytorch_linux_build_defaults: &pytorch_linux_build_defaults
 
 pytorch_linux_test_defaults: &pytorch_linux_test_defaults
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -337,12 +353,10 @@ pytorch_linux_test_defaults: &pytorch_linux_test_defaults
 caffe2_linux_build_defaults: &caffe2_linux_build_defaults
   resource_class: large
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
-  - run:
-      <<: *install_official_git_client
   - checkout
   - run:
       <<: *setup_ci_environment
@@ -398,7 +412,7 @@ caffe2_linux_build_defaults: &caffe2_linux_build_defaults
 
 caffe2_linux_test_defaults: &caffe2_linux_test_defaults
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -817,7 +831,7 @@ binary_linux_build: &binary_linux_build
 # that on the docker executor)
 binary_linux_test: &binary_linux_test
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -883,7 +897,7 @@ binary_linux_test: &binary_linux_test
 
 binary_linux_upload: &binary_linux_upload
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -1068,13 +1082,11 @@ binary_mac_upload: &binary_mac_upload
 ##############################################################################
 smoke_linux_test: &smoke_linux_test
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
   - run:
-      <<: *install_official_git_client
-  - run:
       <<: *setup_ci_environment
   - run:
       <<: *binary_populate_env
@@ -1360,7 +1372,7 @@ jobs:
       USE_CUDA_DOCKER_RUNTIME: "1"
     resource_class: gpu.medium
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
@@ -1391,7 +1403,7 @@ jobs:
       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda8-cudnn7-py3:291"
     resource_class: large
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
@@ -1705,7 +1717,7 @@ jobs:
 # update_s3_htmls job
   update_s3_htmls:
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
index 5e691ef..30ed060 100644 (file)
@@ -27,26 +27,37 @@ setup_linux_system_environment: &setup_linux_system_environment
     # Set up CircleCI GPG keys for apt, if needed
     curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
 
-# NOTE: We only perform the merge in build step and not in test step, because
-# all source files will be shared from build to test
-install_official_git_client: &install_official_git_client
-  name: Install Official Git Client
-  no_output_timeout: "1h"
-  command: |
-    set -ex
+    # Stop background apt updates.  Hypothetically, the kill should not
+    # be necessary, because stop is supposed to send a kill signal to
+    # the process, but we've added it for good luck.  Also
+    # hypothetically, it's supposed to be unnecessary to wait for
+    # the process to block.  We also have that line for good luck.
+    # If you like, try deleting them and seeing if it works.
+    sudo systemctl stop apt-daily.service || true
+    sudo systemctl kill --kill-who=all apt-daily.service || true
+
+    sudo systemctl stop unattended-upgrades.service || true
+    sudo systemctl kill --kill-who=all unattended-upgrades.service || true
+
+    # wait until `apt-get update` has been killed
+    while systemctl is-active --quiet apt-daily.service
+    do
+      sleep 1;
+    done
+    while systemctl is-active --quiet unattended-upgrades.service
+    do
+      sleep 1;
+    done
+
+    # See if we actually were successful
+    systemctl list-units --all | cat
+
+    sudo apt-get purge -y unattended-upgrades
 
-    sudo killall apt-get || true
-    sudo rm /var/lib/apt/lists/lock || true
-    sudo rm /var/cache/apt/archives/lock || true
-    sudo rm /var/lib/dpkg/lock || true
-
-    cat /etc/apt/sources.list
-    sudo sed -i 's#archive.ubuntu.com/ubuntu#us-east-1.ec2.archive.ubuntu.com/ubuntu#g' /etc/apt/sources.list
-    sudo sed -i 's#security.ubuntu.com/ubuntu#us-east-1.ec2.archive.ubuntu.com/ubuntu#g' /etc/apt/sources.list
     cat /etc/apt/sources.list
 
-    sudo apt-get -q -y update
-    sudo apt-get -q -y install openssh-client git
+    ps ax | grep apt
+    ps ax | grep dpkg
 
 install_doc_push_script: &install_doc_push_script
   name: Install the doc push script
@@ -189,24 +200,30 @@ setup_ci_environment: &setup_ci_environment
 
     # Set up NVIDIA docker repo
     curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-    echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
-    echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
-    echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
+    echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
 
-    sudo apt-get -q -y update
-    sudo apt-get -q -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
+    sudo apt-get -y update
+    sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
     # WARNING: Docker version is hardcoded here; you must update the
     # version number below for docker-ce and nvidia-docker2 to get newer
     # versions of Docker.  We hardcode these numbers because we kept
     # getting broken CI when Docker would update their docker version,
     # and nvidia-docker2 would be out of date for a day until they
     # released a newer version of their package.
-    sudo apt-get -q -y install \
+    #
+    # How to figure out what the correct versions of these packages are?
+    # My preferred method is to start a Docker instance of the correct
+    # Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask
+    # apt what the packages you need are.  Note that the CircleCI image
+    # comes with Docker.
+    sudo apt-get -y install \
       linux-headers-$(uname -r) \
       linux-image-generic \
       moreutils \
-      docker-ce=18.06.2~ce~3-0~ubuntu \
-      nvidia-docker2=2.0.3+docker18.06.2-1 \
+      docker-ce=5:18.09.4~3-0~ubuntu-xenial \
+      nvidia-docker2=2.0.3+docker18.09.4-1 \
       expect-dev
 
     sudo pkill -SIGHUP dockerd
@@ -214,8 +231,9 @@ setup_ci_environment: &setup_ci_environment
     sudo pip -q install awscli==1.16.35
 
     if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
-      wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-410.79.run'
-      sudo /bin/bash ./NVIDIA-Linux-x86_64-410.79.run -s --no-drm
+      DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
+      wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
+      sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
       nvidia-smi
     fi
 
index 3d70858..6892c45 100644 (file)
@@ -6,7 +6,7 @@
       USE_CUDA_DOCKER_RUNTIME: "1"
     resource_class: gpu.medium
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
@@ -37,7 +37,7 @@
       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda8-cudnn7-py3:291"
     resource_class: large
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
index 2a28898..39e760a 100644 (file)
@@ -1,7 +1,7 @@
 # update_s3_htmls job
   update_s3_htmls:
     machine:
-      image: default
+      image: ubuntu-1604:201903-01
     steps:
     - run:
         <<: *setup_linux_system_environment
index 7f5f002..581a9c1 100644 (file)
@@ -60,7 +60,7 @@ binary_linux_build: &binary_linux_build
 # that on the docker executor)
 binary_linux_test: &binary_linux_test
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -126,7 +126,7 @@ binary_linux_test: &binary_linux_test
 
 binary_linux_upload: &binary_linux_upload
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
index 953cb55..0156171 100644 (file)
@@ -7,12 +7,10 @@
 pytorch_linux_build_defaults: &pytorch_linux_build_defaults
   resource_class: large
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
-  - run:
-      <<: *install_official_git_client
   - checkout
   - run:
       <<: *setup_ci_environment
@@ -42,7 +40,7 @@ pytorch_linux_build_defaults: &pytorch_linux_build_defaults
 
 pytorch_linux_test_defaults: &pytorch_linux_test_defaults
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
@@ -71,12 +69,10 @@ pytorch_linux_test_defaults: &pytorch_linux_test_defaults
 caffe2_linux_build_defaults: &caffe2_linux_build_defaults
   resource_class: large
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
-  - run:
-      <<: *install_official_git_client
   - checkout
   - run:
       <<: *setup_ci_environment
@@ -132,7 +128,7 @@ caffe2_linux_build_defaults: &caffe2_linux_build_defaults
 
 caffe2_linux_test_defaults: &caffe2_linux_test_defaults
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
index f2235f7..f6fa4d9 100644 (file)
@@ -5,13 +5,11 @@
 ##############################################################################
 smoke_linux_test: &smoke_linux_test
   machine:
-    image: default
+    image: ubuntu-1604:201903-01
   steps:
   - run:
       <<: *setup_linux_system_environment
   - run:
-      <<: *install_official_git_client
-  - run:
       <<: *setup_ci_environment
   - run:
       <<: *binary_populate_env