From 2d47d4157c69e9de84fa3dec9f58bfd82bca6007 Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Tue, 23 Nov 2021 16:30:50 +1100
Subject: [PATCH] Fix BLS based bootloader installation

This reverts I2701260d54cf6bc79f1ac765b512d99d799e8c43,
Idf2a471453c5490d927979fb97aa916418172153 and part of
Iecf7f7e4c992bb23437b6461cdd04cdca96aafa6 which added special flags to
update kernels via grubby.

These changes actually ended up reverting the behaviour on Fedora 35,
which is what led me to investigate what was going on more fully.

All distros still support setting GRUB_DEVICE in /etc/default/grub;
even the BLS based ones (i.e. everything !centos7).

The implementation *is* confusing -- in earlier distros each BLS entry
would refer to the variable $kernelopts; which grub2-mkconfig would
write into /boot/grub2/grubenv.  After commit [1] this was reverted,
and the kernel options are directly written into the BLS entry.

But the real problem is this bit from [2]

 get_sorted_bls()
 {
     if ! [ -d "${blsdir}" ] || ! [ -e /etc/machine-id ]; then
        return
     fi
     ...
     files=($(for bls in ${blsdir}/${machine_id}-*.conf; do
     ...
 }

i.e., to avoid overwriting BLS entries for other OS-boots (?),
grub2-mkconfig will only update those BLS entries that match the
current machine-id.

The problem for DIB is that we are clearing the machine-id early in
finalise.d/01-clear-machine-id, but then running the bootloader update
later in finalise.d/50-bootloader.

The result is that the bootloader entry generated when we installed
the kernel (which guessed at the root= device, etc.) is *not* updated.
Even more annoyingly, the gate doesn't pick this up -- because the
gate tests run on a DIB image that was booted with
"root=LABEL=cloudimg-rootfs" the kernel initially installed with
"install-kernel" (that we never updated) is actually correct.  But
this fails when built on a production host.

Thus we don't need any of the explicit grubby updates; these are
reverted here.  This moves the machine-id clearing to after the
bootloader setup, which allows grub2-mkconfig to setup the BLS entries
correctly.

[1] https://src.fedoraproject.org/rpms/grub2/c/4a742183a39f344a7685bccdc76d5e64dea3766a?branch=master
[2] https://src.fedoraproject.org/rpms/grub2/blob/rawhide/f/0062-Add-BLS-support-to-grub-mkconfig.patch

Depends-On: https://review.opendev.org/c/zuul/nodepool/+/818705
Change-Id: Ia0e49980eb50eae29a5377d24ef0b31e4d78d346
---
 .../bootloader/finalise.d/50-bootloader       | 33 ++++---------------
 ...1-clear-machine-id => 99-clear-machine-id} |  0
 2 files changed, 7 insertions(+), 26 deletions(-)
 rename diskimage_builder/elements/sysprep/finalise.d/{01-clear-machine-id => 99-clear-machine-id} (100%)

diff --git a/diskimage_builder/elements/bootloader/finalise.d/50-bootloader b/diskimage_builder/elements/bootloader/finalise.d/50-bootloader
index 433a2c677..1a7512e44 100755
--- a/diskimage_builder/elements/bootloader/finalise.d/50-bootloader
+++ b/diskimage_builder/elements/bootloader/finalise.d/50-bootloader
@@ -157,27 +157,7 @@ else
     fi
 fi
 
-# Fedora 30 and RHEL-8.2 onwards support the Bootloader Spec and use grubby
-# to manage kernel menu entries and kernel arguments.
-# https://fedoraproject.org/wiki/Changes/BootLoaderSpecByDefault
-USE_GRUBBY=
-if grep -qe "^\s*GRUB_ENABLE_BLSCFG=true" /etc/default/grub; then
-    USE_GRUBBY=true
-fi
-
-# When building CentOS9 with centos-minimal /etc/default/grub does not exist
-# after grub2-tools installation. However we need CS9 to use grubby.
-if [[ "$DISTRO_NAME" == "centos" ]] && [[ $DIB_RELEASE =~ 9 ]]; then
-    USE_GRUBBY=true
-fi
-
-# Override the root device to the default label, and disable uuid
-# lookup.
-if [ -n "$USE_GRUBBY" ]; then
-    grubby --update-kernel=ALL --args="root=LABEL=${DIB_ROOT_LABEL}"
-else
-    echo "GRUB_DEVICE=LABEL=${DIB_ROOT_LABEL}" >> /etc/default/grub
-fi
+echo "GRUB_DEVICE=LABEL=${DIB_ROOT_LABEL}" >> /etc/default/grub
 echo 'GRUB_DISABLE_LINUX_UUID=true' >> /etc/default/grub
 echo "GRUB_TIMEOUT=${DIB_GRUB_TIMEOUT:-5}" >>/etc/default/grub
 echo 'GRUB_TERMINAL="serial console"' >>/etc/default/grub
@@ -195,11 +175,7 @@ else
 fi
 
 GRUB_CMDLINE_LINUX_DEFAULT="console=tty0 console=${SERIAL_CONSOLE} no_timer_check"
-if [ -n "$USE_GRUBBY" ]; then
-    grubby --update-kernel=ALL --args="${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}"
-else
-    echo "GRUB_CMDLINE_LINUX_DEFAULT=\"${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}\"" >>/etc/default/grub
-fi
+echo "GRUB_CMDLINE_LINUX_DEFAULT=\"${GRUB_CMDLINE_LINUX_DEFAULT} ${DIB_BOOTLOADER_DEFAULT_CMDLINE}\"" >>/etc/default/grub
 echo 'GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1"' >>/etc/default/grub
 
 # os-prober leaks /dev/sda into config file in dual-boot host
@@ -221,6 +197,11 @@ fi
 # support uefi specific functionality like secure boot.
 $GRUB_MKCONFIG -o $GRUB_CFG
 
+# If we are using BLS, for debugging purposes dump out the kernel
+if [[ -e /boot/loader/entries ]]; then
+    grubby --info=ALL
+fi
+
 # Remove the fix to disable os_prober
 if [ -n "$PROBER_DISABLED" ]; then
     sed -i '$d' /etc/default/grub
diff --git a/diskimage_builder/elements/sysprep/finalise.d/01-clear-machine-id b/diskimage_builder/elements/sysprep/finalise.d/99-clear-machine-id
similarity index 100%
rename from diskimage_builder/elements/sysprep/finalise.d/01-clear-machine-id
rename to diskimage_builder/elements/sysprep/finalise.d/99-clear-machine-id