Commit | Line | Data |
---|---|---|
d329b32d KS |
1 | #!/usr/bin/bash -eux |
2 | ||
3 | CLEANUP=() | |
4 | ||
5 | function cleanup { | |
6 | set +e | |
7 | for (( index=${#CLEANUP[@]}-1 ; index >= 0 ; index-- )) ;do | |
8 | ${CLEANUP[$index]} | |
9 | done | |
10 | CLEANUP=() | |
11 | set -e | |
12 | } | |
13 | ||
14 | function fail { | |
15 | CODE="${1:-1}" | |
16 | REASON="${2:-Unknown reason}" | |
17 | cleanup | |
18 | echo "${REASON}" >&2 | |
19 | exit "${CODE}" | |
20 | } | |
21 | ||
22 | trap cleanup EXIT TERM INT | |
23 | ||
24 | env | |
25 | ||
26 | REQUIRED_VARIABLES=( | |
27 | OS # OS name | |
28 | RELEASE # OS release | |
29 | ARCH # The image architecture | |
30 | IMAGE_TYPE # The image type to create | |
31 | VARIANT # The variant of the base image to use | |
32 | PROFILE # The ansible group to apply to the new image | |
33 | GIT_BRANCH # The git branch of the automation repo to checkout | |
34 | GIT_URL # The git URL of the automation repo to checkout | |
35 | LXD_CLIENT_CERT # Path to LXD client certificate | |
36 | LXD_CLIENT_KEY # Path to LXD client certificate key | |
37 | SSH_PRIVATE_KEY # Path to SSH private key | |
38 | TEST # 'true' to test launching published image | |
39 | ) | |
40 | MISSING_VARS=0 | |
41 | for var in "${REQUIRED_VARIABLES[@]}" ; do | |
42 | if [ ! -v "$var" ] ; then | |
43 | MISSING_VARS=1 | |
44 | echo "Missing required variable: '${var}'" >&2 | |
45 | fi | |
46 | done | |
47 | if [[ ! "${MISSING_VARS}" == "0" ]] ; then | |
48 | fail 1 "Missing required variables" | |
49 | fi | |
50 | ||
51 | # Default optional variables | |
d95cc37f | 52 | INSTANCE_START_TIMEOUT="${INSTANCE_START_TIMEOUT:-120}" |
d329b32d KS |
53 | NETWORK_SLEEP="${NETWORK_SLEEP:-15}" |
54 | ||
55 | # Dependencies | |
56 | apt-get -y install lxd-client ansible jq | |
57 | ||
58 | # Configuration | |
59 | mkdir -p ~/.config/lxc | |
60 | cp "${LXD_CLIENT_CERT}" ~/.config/lxc/client.crt | |
61 | cp "${LXD_CLIENT_KEY}" ~/.config/lxc/client.key | |
62 | CLEANUP+=( | |
63 | "rm -f ${HOME}/.config/lxc/client.crt" | |
64 | "rm -f ${HOME}/.config/lxc/client.key" | |
65 | ) | |
66 | lxc remote add ci --accept-certificate --auth-type tls "${LXD_HOST}" | |
67 | lxc remote switch ci | |
68 | ||
69 | # Clone lttng-ci | |
70 | git clone -b "${GIT_BRANCH}" "${GIT_URL}" ci | |
71 | cd ci/automation/ansible || exit 1 | |
72 | ||
73 | SOURCE_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}" | |
74 | # Include IMAGE_TYPE since an alias may only be defined once even if the | |
75 | # type of the image differs | |
76 | TARGET_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}/${PROFILE}/${IMAGE_TYPE}" | |
77 | INSTANCE_NAME='' | |
78 | # Try from local cache | |
79 | VM_ARG=() | |
80 | if [ "${IMAGE_TYPE}" == "vm" ] ; then | |
81 | VM_ARG=("--vm") | |
82 | fi | |
83 | ||
84 | set +e | |
85 | # Test | |
86 | # It's possible that concurrent image creation when running parallel jobs causes | |
87 | # an error during the launch: | |
88 | # Error: Failed instance creation: UNIQUE constraint failed: images.project_id, images.fingerprint | |
89 | # C.f. https://github.com/canonical/lxd/issues/11636 | |
90 | # | |
91 | TRIES_MAX=3 | |
92 | TRIES=0 | |
93 | while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do | |
953731b0 | 94 | if ! INSTANCE_NAME=$(lxc -q launch "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${SOURCE_IMAGE_NAME}/${IMAGE_TYPE}") ; then |
d329b32d | 95 | # Try from images |
953731b0 | 96 | if ! INSTANCE_NAME=$(lxc -q launch "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" images:"${SOURCE_IMAGE_NAME}") ; then |
d329b32d KS |
97 | TRIES=$((TRIES + 1)) |
98 | echo "Failed to deployed ephemereal instance attempt ${TRIES}/${TRIES_MAX}" | |
99 | if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then | |
100 | continue | |
101 | fi | |
102 | fail 1 "Failed to deploy ephemereal instance" | |
103 | else | |
104 | break | |
105 | fi | |
106 | else | |
107 | break | |
108 | fi | |
109 | done | |
110 | INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d ':' -f 2 | tr -d ' ')" | |
111 | set -e | |
112 | ||
113 | CLEANUP+=( | |
953731b0 KS |
114 | "lxc delete -f ${INSTANCE_NAME}" |
115 | "lxc stop ${INSTANCE_NAME}" | |
d329b32d KS |
116 | ) |
117 | ||
118 | # VMs may take more time to start, wait until instance is running | |
119 | TIME_REMAINING="${INSTANCE_START_TIMEOUT}" | |
120 | while true ; do | |
121 | set +e | |
122 | INSTANCE_STATUS=$(lxc exec "${INSTANCE_NAME}" hostname) | |
123 | set -e | |
124 | if [[ "${INSTANCE_STATUS}" == "${INSTANCE_NAME}" ]] ; then | |
125 | break | |
126 | fi | |
127 | sleep 1 | |
128 | TIME_REMAINING=$((TIME_REMAINING - 1)) | |
129 | if [ "${TIME_REMAINING}" -lt "0" ] ; then | |
130 | fail 1 "Timed out waiting for instance to become available via 'lxc exec'" | |
131 | fi | |
132 | done | |
133 | ||
134 | # Wait for cloud-init to finish | |
135 | if [[ "${VARIANT}" == "cloud" ]] ; then | |
51144663 KS |
136 | # It's possible for cloud-init to fail, but to still be able to continue. |
137 | # Eg., a profile asks for netplan.io on a system that doesn't have that | |
138 | # package available. | |
139 | lxc exec "${INSTANCE_NAME}" -- cloud-init status -w || true | |
d329b32d KS |
140 | fi |
141 | ||
142 | # Wait for instance to have an ip address (@TODO: is there a better approach?) | |
143 | sleep "${NETWORK_SLEEP}" | |
144 | ||
145 | # @TODO: Handle case when iputils2 is not installed | |
146 | INSTANCE_IP='' | |
147 | POTENTIAL_INTERFACES=(eth0 enp5s0) | |
148 | lxc exec "${INSTANCE_NAME}" -- ip a | |
149 | set +e | |
150 | for interface in "${POTENTIAL_INTERFACES[@]}" ; do | |
151 | if ! DEV_INFO="$(lxc exec "${INSTANCE_NAME}" -- ip a show dev "${interface}")" ; then | |
152 | continue | |
153 | fi | |
154 | INSTANCE_IP="$(echo "${DEV_INFO}" | grep -Eo 'inet [^ ]* ' | cut -d' ' -f2 | cut -d'/' -f1)" | |
155 | if [[ "${INSTANCE_IP}" != "" ]] ; then | |
156 | break | |
157 | fi | |
158 | done | |
159 | set -e | |
160 | if [[ "${INSTANCE_IP}" == "" ]] ; then | |
161 | fail 1 "Failed to determine instance IP address" | |
162 | fi | |
163 | ||
164 | ssh-keyscan "${INSTANCE_IP}" >> ~/.ssh/known_hosts2 | |
165 | #lxc exec "${INSTANCE_NAME}" -- bash -c 'for i in /etc/ssh/ssh_host_*_key ; do ssh-keygen -l -f "$i" ; done' >> "${HOME}/.ssh/known_hosts" | |
166 | CLEANUP+=( | |
167 | "rm -f ${HOME}/.ssh/known_hosts2" | |
168 | ) | |
169 | cp "${SSH_PRIVATE_KEY}" ~/.ssh/id_rsa | |
170 | ssh-keygen -f ~/.ssh/id_rsa -y > ~/.ssh/id_rsa.pub | |
171 | CLEANUP+=( | |
172 | "rm -f ${HOME}/.ssh/id_rsa.pub" | |
173 | "rm -f ${HOME}/.ssh/id_rsa" | |
174 | ) | |
175 | lxc file push ~/.ssh/id_rsa.pub "ci:${INSTANCE_NAME}/root/.ssh/authorized_keys2" | |
29456630 KS |
176 | # Some distros, eg. Rocky Linux, don't enable the use of authorized_keys2 |
177 | # by default | |
178 | lxc exec "ci:${INSTANCE_NAME}" -- bash -c 'if test -f /etc/redhat-release ; then sed -i "s#^AuthorizedKeysFile.*#AuthorizedKeysFile .ssh/authorized_keys .ssh/authorized_keys2#" /etc/ssh/sshd_config ; systemctl restart sshd ; fi' | |
179 | ||
d329b32d KS |
180 | |
181 | # Confirm working SSH connection | |
182 | if ! ssh "${INSTANCE_IP}" hostname ; then | |
183 | fail 1 "Unable to reach ephemereal instance over SSH" | |
184 | fi | |
185 | ||
186 | # Run playbook | |
187 | cat > fake-inventory <<EOF | |
188 | [${PROFILE/-/_}] | |
189 | ${INSTANCE_IP} | |
190 | EOF | |
191 | CLEANUP+=( | |
192 | "rm -f $(pwd)/fake-inventory" | |
193 | ) | |
194 | ||
195 | LANG=C ANSIBLE_STRATEGY=linear ansible-playbook site.yml \ | |
196 | -e '{"compilers_legacy_install": false, "jenkins_user": false, "lttng_modules_checkout_repo": false}' \ | |
197 | -l "${INSTANCE_IP}" -i fake-inventory | |
198 | ||
199 | # Cleanup instance side | |
a2e5135c KS |
200 | LANG=C ANSIBLE_STRATEGY=linear ansible-playbook \ |
201 | playbooks/post-imagebuild-clean.yml \ | |
202 | -l "${INSTANCE_IP}" -i fake-inventory | |
d329b32d | 203 | |
953731b0 KS |
204 | # Graceful shutdown |
205 | lxc stop "${INSTANCE_NAME}" | |
206 | ||
d329b32d | 207 | # Publish |
953731b0 | 208 | if FINGERPRINT=$(lxc publish "${INSTANCE_NAME}" 2>&1 | grep -E -o '[A-Fa-f0-9]{64}') ; then |
9611d4c7 KS |
209 | echo "Published instance with fingerprint '${FINGERPRINT}'" |
210 | else | |
211 | fail 1 "No fingerprint for published instance" | |
212 | fi | |
d329b32d KS |
213 | |
214 | TRIES=0 | |
215 | ||
216 | if [[ "${TEST}" == "true" ]] ; then | |
217 | set +e | |
218 | while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do | |
9611d4c7 | 219 | if ! INSTANCE_NAME=$(lxc -q launch -e "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${FINGERPRINT}") ; then |
d329b32d KS |
220 | TRIES=$((TRIES + 1)) |
221 | echo "Failed to launch instance try ${TRIES}/${TRIES_MAX}" | |
222 | if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then | |
223 | sleep $((1 + RANDOM % 10)) | |
224 | continue | |
225 | fi | |
9611d4c7 | 226 | fail 1 "Failed to launch an instance using newly published image '${FINGERPRINT}'" |
d329b32d KS |
227 | else |
228 | INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d':' -f2 | tr -d ' ')" | |
229 | CLEANUP+=( | |
230 | "lxc stop -f ${INSTANCE_NAME}" | |
231 | ) | |
232 | break | |
233 | fi | |
234 | done | |
235 | set -e | |
236 | fi | |
9611d4c7 KS |
237 | |
238 | lxc image alias delete "${TARGET_IMAGE_NAME}" || true | |
239 | lxc image alias create "${TARGET_IMAGE_NAME}" "${FINGERPRINT}" |