| 1 | #!/usr/bin/bash -eux |
| 2 | |
| 3 | CLEANUP=() |
| 4 | |
| 5 | function cleanup { |
| 6 | set +e |
| 7 | for (( index=${#CLEANUP[@]}-1 ; index >= 0 ; index-- )) ;do |
| 8 | ${CLEANUP[$index]} |
| 9 | done |
| 10 | CLEANUP=() |
| 11 | set -e |
| 12 | } |
| 13 | |
| 14 | function fail { |
| 15 | CODE="${1:-1}" |
| 16 | REASON="${2:-Unknown reason}" |
| 17 | cleanup |
| 18 | echo "${REASON}" >&2 |
| 19 | exit "${CODE}" |
| 20 | } |
| 21 | |
| 22 | trap cleanup EXIT TERM INT |
| 23 | |
| 24 | env |
| 25 | |
| 26 | REQUIRED_VARIABLES=( |
| 27 | OS # OS name |
| 28 | RELEASE # OS release |
| 29 | ARCH # The image architecture |
| 30 | IMAGE_TYPE # The image type to create |
| 31 | VARIANT # The variant of the base image to use |
| 32 | PROFILE # The ansible group to apply to the new image |
| 33 | GIT_BRANCH # The git branch of the automation repo to checkout |
| 34 | GIT_URL # The git URL of the automation repo to checkout |
| 35 | INCUS_CLIENT_CERT # Path to INCUS client certificate |
| 36 | INCUS_CLIENT_KEY # Path to INCUS client certificate key |
| 37 | SSH_PRIVATE_KEY # Path to SSH private key |
| 38 | TEST # 'true' to test launching published image |
| 39 | ) |
| 40 | MISSING_VARS=0 |
| 41 | for var in "${REQUIRED_VARIABLES[@]}" ; do |
| 42 | if [ ! -v "$var" ] ; then |
| 43 | MISSING_VARS=1 |
| 44 | echo "Missing required variable: '${var}'" >&2 |
| 45 | fi |
| 46 | done |
| 47 | if [[ ! "${MISSING_VARS}" == "0" ]] ; then |
| 48 | fail 1 "Missing required variables" |
| 49 | fi |
| 50 | |
| 51 | # Default optional variables |
| 52 | INSTANCE_START_TIMEOUT="${INSTANCE_START_TIMEOUT:-120}" |
| 53 | NETWORK_SLEEP="${NETWORK_SLEEP:-15}" |
| 54 | |
| 55 | # Dependencies |
| 56 | apt-get -y install incus-client ansible jq |
| 57 | |
| 58 | # Configuration |
| 59 | mkdir -p ~/.config/incus |
| 60 | cp "${INCUS_CLIENT_CERT}" ~/.config/incus/client.crt |
| 61 | cp "${INCUS_CLIENT_KEY}" ~/.config/incus/client.key |
| 62 | CLEANUP+=( |
| 63 | "rm -f ${HOME}/.config/incus/client.crt" |
| 64 | "rm -f ${HOME}/.config/incus/client.key" |
| 65 | ) |
| 66 | incus remote add ci --accept-certificate --auth-type tls "${INCUS_HOST}" |
| 67 | incus remote switch ci |
| 68 | |
| 69 | # Clone lttng-ci |
| 70 | git clone -b "${GIT_BRANCH}" "${GIT_URL}" ci |
| 71 | cd ci/automation/ansible || exit 1 |
| 72 | |
| 73 | SOURCE_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}" |
| 74 | # Include IMAGE_TYPE since an alias may only be defined once even if the |
| 75 | # type of the image differs |
| 76 | TARGET_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}/${PROFILE}/${IMAGE_TYPE}" |
| 77 | INSTANCE_NAME='' |
| 78 | # Try from local cache |
| 79 | VM_ARG=() |
| 80 | if [ "${IMAGE_TYPE}" == "vm" ] ; then |
| 81 | VM_ARG=("--vm") |
| 82 | fi |
| 83 | |
| 84 | set +e |
| 85 | # Test |
| 86 | # It's possible that concurrent image creation when running parallel jobs causes |
| 87 | # an error during the launch: |
| 88 | # Error: Failed instance creation: UNIQUE constraint failed: images.project_id, images.fingerprint |
| 89 | # C.f. https://github.com/canonical/lxd/issues/11636 |
| 90 | # |
| 91 | TRIES_MAX=3 |
| 92 | TRIES=0 |
| 93 | while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do |
| 94 | if ! INSTANCE_NAME=$(incus -q launch "${VM_ARG[@]}" -p default -p "${INCUS_INSTANCE_PROFILE}" "${SOURCE_IMAGE_NAME}/${IMAGE_TYPE}") ; then |
| 95 | # Try from images |
| 96 | if ! INSTANCE_NAME=$(incus -q launch "${VM_ARG[@]}" -p default -p "${INCUS_INSTANCE_PROFILE}" images:"${SOURCE_IMAGE_NAME}") ; then |
| 97 | TRIES=$((TRIES + 1)) |
| 98 | echo "Failed to deployed ephemereal instance attempt ${TRIES}/${TRIES_MAX}" |
| 99 | if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then |
| 100 | continue |
| 101 | fi |
| 102 | fail 1 "Failed to deploy ephemereal instance" |
| 103 | else |
| 104 | break |
| 105 | fi |
| 106 | else |
| 107 | break |
| 108 | fi |
| 109 | done |
| 110 | INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d ':' -f 2 | tr -d ' ')" |
| 111 | set -e |
| 112 | |
| 113 | CLEANUP+=( |
| 114 | "incus delete -f ${INSTANCE_NAME}" |
| 115 | "incus stop ${INSTANCE_NAME}" |
| 116 | ) |
| 117 | |
| 118 | # VMs may take more time to start, wait until instance is running |
| 119 | TIME_REMAINING="${INSTANCE_START_TIMEOUT}" |
| 120 | while true ; do |
| 121 | set +e |
| 122 | INSTANCE_STATUS=$(incus exec "${INSTANCE_NAME}" hostname) |
| 123 | set -e |
| 124 | if [[ "${INSTANCE_STATUS}" == "${INSTANCE_NAME}" ]] ; then |
| 125 | break |
| 126 | fi |
| 127 | sleep 1 |
| 128 | TIME_REMAINING=$((TIME_REMAINING - 1)) |
| 129 | if [ "${TIME_REMAINING}" -lt "0" ] ; then |
| 130 | fail 1 "Timed out waiting for instance to become available via 'incus exec'" |
| 131 | fi |
| 132 | done |
| 133 | |
| 134 | # Wait for cloud-init to finish |
| 135 | if [[ "${VARIANT}" == "cloud" ]] ; then |
| 136 | # It's possible for cloud-init to fail, but to still be able to continue. |
| 137 | # Eg., a profile asks for netplan.io on a system that doesn't have that |
| 138 | # package available. |
| 139 | incus exec "${INSTANCE_NAME}" -- cloud-init status -w || true |
| 140 | fi |
| 141 | |
| 142 | # Wait for instance to have an ip address (@TODO: is there a better approach?) |
| 143 | sleep "${NETWORK_SLEEP}" |
| 144 | |
| 145 | # @TODO: Handle case when iputils2 is not installed |
| 146 | INSTANCE_IP='' |
| 147 | POTENTIAL_INTERFACES=(eth0 enp5s0) |
| 148 | incus exec "${INSTANCE_NAME}" -- ip a |
| 149 | set +e |
| 150 | for interface in "${POTENTIAL_INTERFACES[@]}" ; do |
| 151 | if ! DEV_INFO="$(incus exec "${INSTANCE_NAME}" -- ip a show dev "${interface}")" ; then |
| 152 | continue |
| 153 | fi |
| 154 | INSTANCE_IP="$(echo "${DEV_INFO}" | grep -Eo 'inet [^ ]* ' | cut -d' ' -f2 | cut -d'/' -f1)" |
| 155 | if [[ "${INSTANCE_IP}" != "" ]] ; then |
| 156 | break |
| 157 | fi |
| 158 | done |
| 159 | set -e |
| 160 | if [[ "${INSTANCE_IP}" == "" ]] ; then |
| 161 | fail 1 "Failed to determine instance IP address" |
| 162 | fi |
| 163 | |
| 164 | ssh-keyscan "${INSTANCE_IP}" >> ~/.ssh/known_hosts2 |
| 165 | #incus exec "${INSTANCE_NAME}" -- bash -c 'for i in /etc/ssh/ssh_host_*_key ; do ssh-keygen -l -f "$i" ; done' >> "${HOME}/.ssh/known_hosts" |
| 166 | CLEANUP+=( |
| 167 | "rm -f ${HOME}/.ssh/known_hosts2" |
| 168 | ) |
| 169 | cp "${SSH_PRIVATE_KEY}" ~/.ssh/id_rsa |
| 170 | ssh-keygen -f ~/.ssh/id_rsa -y > ~/.ssh/id_rsa.pub |
| 171 | CLEANUP+=( |
| 172 | "rm -f ${HOME}/.ssh/id_rsa.pub" |
| 173 | "rm -f ${HOME}/.ssh/id_rsa" |
| 174 | ) |
| 175 | incus file push ~/.ssh/id_rsa.pub "ci:${INSTANCE_NAME}/root/.ssh/authorized_keys2" |
| 176 | # Some distros, eg. Rocky Linux, don't enable the use of authorized_keys2 |
| 177 | # by default |
| 178 | incus exec "ci:${INSTANCE_NAME}" -- bash -c 'if test -f /etc/redhat-release ; then sed -i "s#^AuthorizedKeysFile.*#AuthorizedKeysFile .ssh/authorized_keys .ssh/authorized_keys2#" /etc/ssh/sshd_config ; systemctl restart sshd ; fi' |
| 179 | |
| 180 | |
| 181 | # Confirm working SSH connection |
| 182 | if ! ssh "${INSTANCE_IP}" hostname ; then |
| 183 | fail 1 "Unable to reach ephemereal instance over SSH" |
| 184 | fi |
| 185 | |
| 186 | # Run playbook |
| 187 | cat > fake-inventory <<EOF |
| 188 | [${PROFILE/-/_}] |
| 189 | ${INSTANCE_IP} |
| 190 | EOF |
| 191 | CLEANUP+=( |
| 192 | "rm -f $(pwd)/fake-inventory" |
| 193 | ) |
| 194 | |
| 195 | LANG=C ANSIBLE_STRATEGY=linear ansible-playbook site.yml \ |
| 196 | -e '{"compilers_legacy_install": false, "jenkins_user": false, "lttng_modules_checkout_repo": false}' \ |
| 197 | -l "${INSTANCE_IP}" -i fake-inventory |
| 198 | |
| 199 | # Cleanup instance side |
| 200 | LANG=C ANSIBLE_STRATEGY=linear ansible-playbook \ |
| 201 | playbooks/post-imagebuild-clean.yml \ |
| 202 | -l "${INSTANCE_IP}" -i fake-inventory |
| 203 | |
| 204 | # Graceful shutdown |
| 205 | incus stop "${INSTANCE_NAME}" |
| 206 | |
| 207 | # Publish |
| 208 | if FINGERPRINT=$(incus publish "${INSTANCE_NAME}" 2>&1 | grep -E -o '[A-Fa-f0-9]{64}') ; then |
| 209 | echo "Published instance with fingerprint '${FINGERPRINT}'" |
| 210 | else |
| 211 | fail 1 "No fingerprint for published instance" |
| 212 | fi |
| 213 | |
| 214 | TRIES=0 |
| 215 | |
| 216 | if [[ "${TEST}" == "true" ]] ; then |
| 217 | set +e |
| 218 | while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do |
| 219 | if ! INSTANCE_NAME=$(incus -q launch -e "${VM_ARG[@]}" -p default -p "${INCUS_INSTANCE_PROFILE}" "${FINGERPRINT}") ; then |
| 220 | TRIES=$((TRIES + 1)) |
| 221 | echo "Failed to launch instance try ${TRIES}/${TRIES_MAX}" |
| 222 | if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then |
| 223 | sleep $((1 + RANDOM % 10)) |
| 224 | continue |
| 225 | fi |
| 226 | fail 1 "Failed to launch an instance using newly published image '${FINGERPRINT}'" |
| 227 | else |
| 228 | INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d':' -f2 | tr -d ' ')" |
| 229 | CLEANUP+=( |
| 230 | "incus stop -f ${INSTANCE_NAME}" |
| 231 | ) |
| 232 | break |
| 233 | fi |
| 234 | done |
| 235 | set -e |
| 236 | fi |
| 237 | |
| 238 | incus image alias delete "${TARGET_IMAGE_NAME}" || true |
| 239 | incus image alias create "${TARGET_IMAGE_NAME}" "${FINGERPRINT}" |