From 4d180be5175e176ec27680557c1d67536a847610 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 20 Jul 2021 13:19:15 +0100 Subject: [cloud] Retry DHCP aggressively in AWS EC2 The DHCP service in EC2 has been observed to occasionally stop responding for bursts of several seconds. This can easily result in a failed boot, since the current cloud boot script will attempt DHCP only once. Work around this problem by retrying DHCP in a fairly tight cycle within the cloud boot script, and falling back to a reboot after several failed DHCP attempts. Signed-off-by: Michael Brown --- src/config/cloud/aws.ipxe | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/config/cloud/aws.ipxe b/src/config/cloud/aws.ipxe index 66ad77d98..6c007398e 100644 --- a/src/config/cloud/aws.ipxe +++ b/src/config/cloud/aws.ipxe @@ -3,7 +3,22 @@ echo Amazon EC2 - iPXE boot via user-data echo CPU: ${cpuvendor} ${cpumodel} ifstat || -dhcp || + +set attempt:int8 1 +:dhcp_retry +echo DHCP attempt ${attempt} +dhcp --timeout 5000 && goto dhcp_ok || +ifstat || +inc attempt +iseq ${attempt} 10 || goto dhcp_retry + +:dhcp_fail +echo DHCP failed - rebooting +reboot || +exit + +:dhcp_ok route || chain -ar http://169.254.169.254/latest/user-data || ifstat || +exit -- cgit