From 4d180be5175e176ec27680557c1d67536a847610 Mon Sep 17 00:00:00 2001
From: Michael Brown <mcb30@ipxe.org>
Date: Tue, 20 Jul 2021 13:19:15 +0100
Subject: [cloud] Retry DHCP aggressively in AWS EC2

The DHCP service in EC2 has been observed to occasionally stop
responding for bursts of several seconds.  This can easily result in a
failed boot, since the current cloud boot script will attempt DHCP
only once.

Work around this problem by retrying DHCP in a fairly tight cycle
within the cloud boot script, and falling back to a reboot after
several failed DHCP attempts.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
---
 src/config/cloud/aws.ipxe | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/config/cloud/aws.ipxe b/src/config/cloud/aws.ipxe
index 66ad77d98..6c007398e 100644
--- a/src/config/cloud/aws.ipxe
+++ b/src/config/cloud/aws.ipxe
@@ -3,7 +3,22 @@
 echo Amazon EC2 - iPXE boot via user-data
 echo CPU: ${cpuvendor} ${cpumodel}
 ifstat ||
-dhcp ||
+
+set attempt:int8 1
+:dhcp_retry
+echo DHCP attempt ${attempt}
+dhcp --timeout 5000 && goto dhcp_ok ||
+ifstat ||
+inc attempt
+iseq ${attempt} 10 || goto dhcp_retry
+
+:dhcp_fail
+echo DHCP failed - rebooting
+reboot ||
+exit
+
+:dhcp_ok
 route ||
 chain -ar http://169.254.169.254/latest/user-data ||
 ifstat ||
+exit
-- 
cgit