From fe47937c7e849d1665f729089363339db25ec888 Mon Sep 17 00:00:00 2001 From: Kienan Stewart Date: Wed, 17 Jan 2024 14:04:46 -0500 Subject: [PATCH] ansible: Add cron job to reboot armhf nodes that have gone read-only Change-Id: I4f2c38244bf4135600e1e33bf9c4fe76be1de740 Signed-off-by: Kienan Stewart --- automation/ansible/group_vars/node_armhf.yml | 6 ++++++ .../common-node/files/readonly_root_reboot.sh | 15 +++++++++++++++ .../ansible/roles/common-node/tasks/main.yml | 17 +++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 automation/ansible/group_vars/node_armhf.yml create mode 100755 automation/ansible/roles/common-node/files/readonly_root_reboot.sh diff --git a/automation/ansible/group_vars/node_armhf.yml b/automation/ansible/group_vars/node_armhf.yml new file mode 100644 index 0000000..27c2931 --- /dev/null +++ b/automation/ansible/group_vars/node_armhf.yml @@ -0,0 +1,6 @@ +--- +# The SATA controllers on the boards that are running the armhf +# nodes are somewhat flaky, and the root filesystem often ends +# up readonly due to errors. When the FS goes read-only, jobs +# will fail so the easiest action is to have the node reboot. +common_node_rootfs_readonly_reboot: true diff --git a/automation/ansible/roles/common-node/files/readonly_root_reboot.sh b/automation/ansible/roles/common-node/files/readonly_root_reboot.sh new file mode 100755 index 0000000..4ce1091 --- /dev/null +++ b/automation/ansible/roles/common-node/files/readonly_root_reboot.sh @@ -0,0 +1,15 @@ +#!/usr/bin/bash + +IFS=',' read -r -a OPTIONS < <(findmnt --json / | jq -r '.[][0]["options"]') +RO= +for OPTION in "${OPTIONS[@]}" ; do + if [[ "${OPTION}" == "ro" ]] ; then + RO=0 + break + fi +done + +if [[ "${RO}" == "0" ]] ; then + echo "'/' is mounted read-only, rebooting" + shutdown -r "+1" +fi diff --git a/automation/ansible/roles/common-node/tasks/main.yml b/automation/ansible/roles/common-node/tasks/main.yml index 177603e..28f2a27 100644 --- a/automation/ansible/roles/common-node/tasks/main.yml +++ b/automation/ansible/roles/common-node/tasks/main.yml @@ -20,3 +20,20 @@ - include: setup-Suse.yml when: ansible_os_family == 'Suse' + +- when: common_node_rootfs_readonly_reboot|default(false) + block: + - name: Install readonly reboot script + ansible.builtin.copy: + dest: '/usr/sbin/readonly_root_reboot.sh' + src: 'readonly_root_reboot.sh' + owner: 'root' + group: 'root' + mode: '0750' + - name: Add cronjob + ansible.builtin.cron: + name: 'readonly_root_reboot' + job: '/usr/sbin/readonly_root_reboot.sh' + cron_file: 'readonly_root_reboot' + minute: '*/10' + user: 'root' -- 2.34.1