ci: Improve interrupt signal handling in crosvm-runner.sh
authorCristian Ciocaltea <cristian.ciocaltea@collabora.com>
Thu, 3 Mar 2022 23:59:55 +0000 (01:59 +0200)
committerCristian Ciocaltea <cristian.ciocaltea@collabora.com>
Tue, 8 Mar 2022 10:52:54 +0000 (12:52 +0200)
Run crosvm as a background process in order to allow intercepting
interrupt signals (INT, TERM) and properly release/cleanup any allocated
resources.

This is particularly helpful when one or more crosvm tasks hang, which
will eventually prevent subsequent instances to be started - currently
we can handle up to 128 concurrent crosvm instances per runner.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15238>

.gitlab-ci/crosvm-runner.sh

index 8697b52..743afc7 100755 (executable)
@@ -55,7 +55,7 @@ set_vsock_context || { echo "Could not generate crosvm vsock CID" >&2; exit 1; }
 
 # Ensure cleanup on script exit
 trap 'exit ${exit_code}' INT TERM
-trap 'exit_code=$?; [ -z "${SOCAT_PIDS}" ] || kill ${SOCAT_PIDS} >/dev/null 2>&1 || true; rm -rf ${VSOCK_TEMP_DIR}' EXIT
+trap 'exit_code=$?; [ -z "${CROSVM_PID}${SOCAT_PIDS}" ] || kill ${CROSVM_PID} ${SOCAT_PIDS} >/dev/null 2>&1 || true; rm -rf ${VSOCK_TEMP_DIR}' EXIT
 
 # Securely pass the current variables to the crosvm environment
 CI_COMMON="${CI_PROJECT_DIR}"/install/common
@@ -65,16 +65,16 @@ echo "Variables passed through:"
 # Set the crosvm-script as the arguments of the current script
 echo "$@" > ${VSOCK_TEMP_DIR}/crosvm-script.sh
 
+# Setup networking
+/usr/sbin/iptables-legacy -w -t nat -A POSTROUTING -o eth0 -j MASQUERADE
+echo 1 > /proc/sys/net/ipv4/ip_forward
+
 # Start background processes to receive output from guest
 socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDERR},retry=200,interval=0.1 stderr &
 SOCAT_PIDS=$!
 socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDOUT},retry=200,interval=0.1 stdout &
 SOCAT_PIDS="${SOCAT_PIDS} $!"
 
-# Setup networking
-/usr/sbin/iptables-legacy -w -t nat -A POSTROUTING -o eth0 -j MASQUERADE
-echo 1 > /proc/sys/net/ipv4/ip_forward
-
 # Prepare to start crosvm
 unset DISPLAY
 unset XDG_RUNTIME_DIR
@@ -91,12 +91,18 @@ crosvm run \
     --shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \
     --host_ip "192.168.30.1" --netmask "255.255.255.0" --mac "AA:BB:CC:00:00:12" \
     --cid ${VSOCK_CID} -p "${CROSVM_KERN_ARGS}" \
-    /lava-files/bzImage > ${VSOCK_TEMP_DIR}/crosvm 2>&1
+    /lava-files/bzImage > ${VSOCK_TEMP_DIR}/crosvm 2>&1 &
 
+# Wait for crosvm process to terminate
+CROSVM_PID=$!
+wait ${CROSVM_PID}
 CROSVM_RET=$?
+unset CROSVM_PID
+
 [ ${CROSVM_RET} -eq 0 ] && {
-    # socat bg processes should terminate as soon as the remote peers exit
+    # socat background processes terminate gracefully on remote peers exit
     wait
+    unset SOCAT_PIDS
     # The actual return code is the crosvm guest script's exit code
     CROSVM_RET=$(cat ${VSOCK_TEMP_DIR}/exit_code 2>/dev/null)
     # Force error when the guest script's exit code is not available