diff --git a/tools/nocloud-factory-install/factory-install/systemd/utils/send-factory-sel-event b/tools/nocloud-factory-install/factory-install/systemd/utils/send-factory-sel-event index 7c196577..eedbf82d 100644 --- a/tools/nocloud-factory-install/factory-install/systemd/utils/send-factory-sel-event +++ b/tools/nocloud-factory-install/factory-install/systemd/utils/send-factory-sel-event @@ -11,17 +11,17 @@ log_info() { echo "$(date '+%F %H:%M:%S') INFO: $*"; } log_warn() { echo "$(date '+%F %H:%M:%S') WARN: $*"; } declare -Ar CODES=( - [bootstrap.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd1" - [bootstrap.err]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd2" - [config.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd3" - [config.err]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd4" - [setup.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd5" - [setup.err]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd6" - [tests.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd7" - [tests.err]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd8" - [backup.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xd9" - [backup.err]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xda" - [finished.ok]="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe0" + [bootstrap.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xF6" + [bootstrap.err]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xF7" + [config.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xF8" + [config.err]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xF9" + [setup.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFA" + [setup.err]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFB" + [tests.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFC" + [tests.err]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFD" + [backup.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFE" + [backup.err]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xFF" + [finished.ok]="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE0" ) stage="${1:-}"; status="${2:-}" diff --git a/tools/nocloud-factory-install/seed-config/run-cloud-init-from-seed.sh b/tools/nocloud-factory-install/seed-config/run-cloud-init-from-seed.sh index d904aab3..de9bd569 100755 --- a/tools/nocloud-factory-install/seed-config/run-cloud-init-from-seed.sh +++ b/tools/nocloud-factory-install/seed-config/run-cloud-init-from-seed.sh @@ -17,6 +17,10 @@ SEED_SERVICE="/etc/systemd/system/cloud-init-seed.service" SEED_NETWORK_CFG="network-config" NETWORK_CFG_FILE="/run/.$SEED_NETWORK_CFG" CLOUD_INIT_IF_FILE="/etc/network/interfaces.d/50-cloud-init" +readonly EVENT_FACTORY_SETUP_COMPLETE="factory_setup_complete" +readonly EVENT_FACTORY_SETUP_FAILED="factory_setup_failed" +readonly DATA_FACTORY_SETUP_COMPLETE="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE0 # \"Factory Setup Complete\"" +readonly DATA_FACTORY_SETUP_FAILED="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE1 # \"Factory Setup Failed\"" function check_rc_die { local -i rc=${1} @@ -59,6 +63,32 @@ flock -n 200 || { exit 0 } +function send_ipmi_event { + local event_type="$1" + local event_data + case "$event_type" in + "$EVENT_FACTORY_SETUP_COMPLETE") event_data="$DATA_FACTORY_SETUP_COMPLETE" ;; + "$EVENT_FACTORY_SETUP_FAILED") event_data="$DATA_FACTORY_SETUP_FAILED" ;; + *) + log_warn "Unknown IPMI event type: $event_type" + return 1 + ;; + esac + + local temp_file=$(mktemp /tmp/ipmi_event_XXXXXX.txt) + echo "$event_data" > "$temp_file" + + if ipmitool sel add "$temp_file" 2>/dev/null; then + log_info "IPMI event sent successfully: $event_type" + rm -f "$temp_file" + return 0 + else + log_warn "Failed to send IPMI event: $event_type" + rm -f "$temp_file" + return 1 + fi +} + # If clean is passed as an argument, remove the udev rule and service, # the custom cloud.cfg file, and the script itself. # This is to ensure that the cloud-init-seed service is not triggered @@ -78,8 +108,10 @@ log_info "Starting cloud-init using seed ISO..." # Checks if factory-install has been completed. This is required to be able # to run cloud-init from a seed ISO. if [[ ! -f "$FACTORY_INSTALL_COMPLETE_FILE" ]]; then - log_fatal "Cloud-init from factory-install has not been completed yet. Exiting." + send_ipmi_event "$EVENT_FACTORY_SETUP_FAILED" + log_fatal "/var/lib/factory-install/stage/complete does not exist. Ensure factory-install was successful." fi +send_ipmi_event "$EVENT_FACTORY_SETUP_COMPLETE" # Finds the first device found with the label CIDATA or cidata. # If the device is not found, exit the script. diff --git a/utilities/platform-util/scripts/enroll-init-reconfigure b/utilities/platform-util/scripts/enroll-init-reconfigure index 4fbb3546..eac4d62e 100755 --- a/utilities/platform-util/scripts/enroll-init-reconfigure +++ b/utilities/platform-util/scripts/enroll-init-reconfigure @@ -72,36 +72,36 @@ readonly EVENT_PLATFORM_CLOUDINIT_UPDATE_COMPLETE="platform_cloudinit_update_com readonly EVENT_PLATFORM_CLOUDINIT_UPDATE_FAILED="platform_cloudinit_update_failed" # IPMI payloads (data bytes) -readonly DATA_FACTORY_SETUP_COMPLETE="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe0 # \"Factory Setup Complete\"" -readonly DATA_FACTORY_SETUP_FAILED="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe1 # \"Factory Setup Failed\"" +readonly DATA_FACTORY_SETUP_COMPLETE="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE0 # \"Factory Setup Complete\"" +readonly DATA_FACTORY_SETUP_FAILED="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE1 # \"Factory Setup Failed\"" -readonly DATA_APISERVER_CERT_OK="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe2 # \"ApiServer Cert Valid\"" -readonly DATA_LEAF_CERTS_RENEW_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe3 # \"K8S Leaf Certs Renew Failed\"" -readonly DATA_RENEW_FAIL_PODS="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe8 # \"Pods Cert Renew Failed\"" -readonly DATA_CERTMANAGER_CERTS_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xef # \"Cert-manager Secrets Renew Failed\"" -readonly DATA_RENEW_FAIL_KUBECTL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xf0 # \"Kubectl Cert Renew Failed\"" +readonly DATA_APISERVER_CERT_OK="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE2 # \"ApiServer Cert Valid\"" +readonly DATA_LEAF_CERTS_RENEW_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE3 # \"K8S Leaf Certs Renew Failed\"" +readonly DATA_RENEW_FAIL_PODS="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE4 # \"Pods Cert Renew Failed\"" +readonly DATA_CERTMANAGER_CERTS_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE5 # \"Cert-manager Secrets Renew Failed\"" +readonly DATA_RENEW_FAIL_KUBECTL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE6 # \"Kubectl Cert Renew Failed\"" -readonly DATA_MANUAL_CAS_OK="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe9 # \"Manual CA Certs Valid\"" -readonly DATA_MANUAL_CA_K8S_FP_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xec # \"K8S/Front-proxy Cert Expired\"" -readonly DATA_MANUAL_CA_ETCD_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xed # \"ETCD CA Cert Expired\"" -readonly DATA_MANUAL_CAS_BOTH_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xee # \"ETCD CA and K8S/Front-proxy Certs Expired\"" +readonly DATA_MANUAL_CAS_OK="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE7 # \"Manual CA Certs Valid\"" +readonly DATA_MANUAL_CA_K8S_FP_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE8 # \"K8S/Front-proxy Cert Expired\"" +readonly DATA_MANUAL_CA_ETCD_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xE9 # \"ETCD CA Cert Expired\"" +readonly DATA_MANUAL_CAS_BOTH_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xEA # \"ETCD CA and K8S/Front-proxy Certs Expired\"" -readonly DATA_SLOCAL_CA_OK="0x04 0xF0 0x01 0x6f 0xff 0xff 0xea # \"System-local-ca Cert Valid\"" -readonly DATA_SLOCAL_CA_FAIL="0x04 0xF0 0x01 0x6f 0xff 0xff 0xeb # \"System-local-ca Cert Expired\"" +readonly DATA_SLOCAL_CA_OK="0x04 0x12 0xCC 0x63 0xCC 0x10 0xEB # \"System-local-ca Cert Valid\"" +readonly DATA_SLOCAL_CA_FAIL="0x04 0x12 0xCC 0x63 0xCC 0x10 0xEC # \"System-local-ca Cert Expired\"" -readonly DATA_PLATFORM_CLOUDINIT_UPDATE_COMPLETE="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe4 # \"Platform Cloud-init Update Complete\"" -readonly DATA_PLATFORM_CLOUDINIT_UPDATE_FAILED="0x04 0xF0 0x01 0x6f 0xff 0xff 0xe5 # \"Platform Cloud-init Update Failed\"" +readonly DATA_PLATFORM_CLOUDINIT_UPDATE_COMPLETE="0x04 0x12 0xCC 0x63 0xCC 0x10 0xED # \"Platform Cloud-init Update Complete\"" +readonly DATA_PLATFORM_CLOUDINIT_UPDATE_FAILED="0x04 0x12 0xCC 0x63 0xCC 0x10 0xEE # \"Platform Cloud-init Update Failed\"" # IPMI SEL event format reference: # [EvM Revision] [Sensor Type] [Sensor Number] [Event Dir / Event Type Code] # [Event Data 1] [Event Data 2] [Event Data 3] # -# Example: 0x04 0xF0 0x01 0x6f 0xff 0xff 0xe4 +# Example: 0x04 0x12 0xCC 0x63 0xCC 0x10 0xE0 # 0x04 = EvM Revision (IPMI v2.0) -# 0xF0 = Sensor Type (vendor-defined / OEM-specific) -# 0x01 = Sensor Number (firmware-defined) -# 0x6f = Event direction (vendor-specific encoding) -# 0xff 0xff 0xe4 = Event Data bytes (3 bytes, OEM-specific payload) +# 0x12 = Sensor Type (vendor-defined / OEM-specific) +# 0xCC = Sensor Number (firmware-defined) +# 0x63 = Event direction (vendor-specific encoding) +# 0xCC 0x10 0xE0 = Event Data bytes (3 bytes, OEM-specific payload) # # For our usage: # - The third byte (Sensor Number) is set to 0x01, corresponding to a sensor type "Unknown". @@ -155,18 +155,6 @@ function send_ipmi_event { fi } -function verify_factory_install { - log_info "Checking factory-install..." - - if [ ! -f /var/lib/factory-install/stage/final ]; then - send_ipmi_event "$EVENT_FACTORY_SETUP_FAILED" - log_fatal "/var/lib/factory-install/stage/final does not exist. Ensure factory-install was successful." - fi - - send_ipmi_event "$EVENT_FACTORY_SETUP_COMPLETE" - log_info "factory-install check successful." -} - # The enroll-init reconfigure script runs during startup via cloud-init while # system services may not be settled. This timing can lead to intermittent errors # for early system commands. This function is used to mitigate these problems by @@ -639,11 +627,7 @@ while [[ "$#" -gt 0 ]]; do esac done -# This script can only be run if the factory install is complete, so we check that first. -# It's important that we fail due to an invalid factory install before any other -# type of failure, as that's the IPMI SEL event the system controller monitors first. # Main execution flow -verify_factory_install # Ensure all required arguments are provided if [ -z "$OAM_SUBNET" ] || [ -z "$OAM_GATEWAY_IP" ] || [ -z "$OAM_IP" ] || [ -z "$NEW_PASSWORD" ]; then