From 6542a9cb0458da8e6e5e045bb29dc24b318b276f Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Wed, 16 Sep 2020 13:10:13 -0700 Subject: [PATCH] Don't run os-prober from grub2-mkconfig By default, grub2-mkconfig scans everything to look for other environments and then load those into the grub configuration. It makes sense, but on newer versions of grub2 in distribution images, os-prober is taking an exceptionally long time in some cases where more than one storage device exists with other filesystems. As a result, of the os-prober execution by grub2-mkconfig, the bootloader installation can completely time out and fail the deployment. This is presently experienced with metalsmith on centos8. There are numerous sporatic reports of issues like this issue where grub2-mkconfig hangs for some period of time, and this is observable on Centos8.2 in our CI. While one report[0] mentions this issue, Another bug [1] has the dialog that actually helps us frame the context as to what we likely should do. Also, fixes the unit testing so we actually test if we're running with grub2. :\ [0]: https://bugzilla.redhat.com/show_bug.cgi?id=1744693 [1]: https://bugzilla.redhat.com/show_bug.cgi?id=1709682 Depends-On: https://review.opendev.org/#/c/748315 Change-Id: I14bf299afef3a1ddb2006fe5f182d7f0d249e734 --- ironic_python_agent/extensions/image.py | 6 ++--- .../tests/unit/extensions/test_image.py | 27 +++++++++++++------ ...-grub2-mkconfig-hang-fe22cde231994044.yaml | 11 ++++++++ zuul.d/ironic-python-agent-jobs.yaml | 14 ++++++++++ zuul.d/project.yaml | 2 ++ 5 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/fixes-centos-fedora-grub2-mkconfig-hang-fe22cde231994044.yaml diff --git a/ironic_python_agent/extensions/image.py b/ironic_python_agent/extensions/image.py index dd1eedd20..f5050df87 100644 --- a/ironic_python_agent/extensions/image.py +++ b/ironic_python_agent/extensions/image.py @@ -658,13 +658,13 @@ def _install_grub2(device, root_uuid, efi_system_part_uuid=None, r'GRUB_CMDLINE_LINUX="\1 %s"' % " ".join(rd_md_uuids), contents)) - - # Generate the grub configuration file utils.execute('chroot %(path)s /bin/sh -c ' '"%(bin)s-mkconfig -o ' '/boot/%(bin)s/grub.cfg"' % {'path': path, 'bin': binary_name}, shell=True, - env_variables={'PATH': path_variable}) + env_variables={'PATH': path_variable, + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True) LOG.info("GRUB2 successfully installed on %s", device) diff --git a/ironic_python_agent/tests/unit/extensions/test_image.py b/ironic_python_agent/tests/unit/extensions/test_image.py index 0200df5cc..636d56ad3 100644 --- a/ironic_python_agent/tests/unit/extensions/test_image.py +++ b/ironic_python_agent/tests/unit/extensions/test_image.py @@ -381,6 +381,7 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n expected = [mock.call('efibootmgr', '--version')] mock_execute.assert_has_calls(expected) + @mock.patch.object(os.path, 'exists', lambda *_: True) @mock.patch.object(image, '_is_bootloader_loaded', lambda *_: False) @mock.patch.object(hardware, 'is_md_device', autospec=True) @mock.patch.object(hardware, 'md_get_raid_devices', autospec=True) @@ -409,16 +410,18 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n env_variables={ 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), mock.call(('chroot %s /bin/sh -c ' - '"grub-install %s"' % + '"grub2-install %s"' % (self.fake_dir, self.fake_dev)), shell=True, env_variables={ 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), mock.call(('chroot %s /bin/sh -c ' - '"grub-mkconfig -o ' - '/boot/grub/grub.cfg"' % self.fake_dir), + '"grub2-mkconfig -o ' + '/boot/grub2/grub.cfg"' % self.fake_dir), shell=True, env_variables={ - 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), + 'PATH': '/sbin:/bin:/usr/sbin:/sbin', + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True), mock.call(('chroot %s /bin/sh -c "umount -a -t vfat"' % (self.fake_dir)), shell=True, env_variables={ @@ -478,7 +481,9 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n '/boot/grub/grub.cfg"' % self.fake_dir), shell=True, env_variables={ - 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), + 'PATH': '/sbin:/bin:/usr/sbin:/sbin', + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True), mock.call(('chroot %s /bin/sh -c "umount -a -t vfat"' % (self.fake_dir)), shell=True, env_variables={ @@ -556,7 +561,9 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n '/boot/grub/grub.cfg"' % self.fake_dir), shell=True, env_variables={ - 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), + 'PATH': '/sbin:/bin:/usr/sbin:/sbin', + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True), mock.call('umount', self.fake_dir + '/boot/efi', attempts=3, delay_on_retry=True), mock.call(('chroot %s /bin/sh -c "umount -a -t vfat"' % @@ -963,7 +970,9 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n '/boot/grub/grub.cfg"' % self.fake_dir), shell=True, env_variables={ - 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), + 'PATH': '/sbin:/bin:/usr/sbin:/sbin', + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True), mock.call('umount', self.fake_dir + '/boot/efi', attempts=3, delay_on_retry=True), mock.call(('chroot %s /bin/sh -c "umount -a -t vfat"' % @@ -1055,7 +1064,9 @@ efibootmgr: ** Warning ** : Boot0005 has same label ironic1\n '/boot/grub/grub.cfg"' % self.fake_dir), shell=True, env_variables={ - 'PATH': '/sbin:/bin:/usr/sbin:/sbin'}), + 'PATH': '/sbin:/bin:/usr/sbin:/sbin', + 'GRUB_DISABLE_OS_PROBER': 'true'}, + use_standard_locale=True), mock.call(('chroot %s /bin/sh -c "umount -a -t vfat"' % (self.fake_dir)), shell=True, env_variables={ diff --git a/releasenotes/notes/fixes-centos-fedora-grub2-mkconfig-hang-fe22cde231994044.yaml b/releasenotes/notes/fixes-centos-fedora-grub2-mkconfig-hang-fe22cde231994044.yaml new file mode 100644 index 000000000..9d0056867 --- /dev/null +++ b/releasenotes/notes/fixes-centos-fedora-grub2-mkconfig-hang-fe22cde231994044.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Fixes an issue where deployments of Fedora or Centos can hang when + using grub2 with the execution of the ``grub2-mkconfig`` command + not returning before the deployment process times out. This is + because ``grub2-mkconfig`` triggers ``os-prober`` which can + take an extended period of time to evaluate additional unrelated + devices for dual-boot scenarios. Since operators are not dual + booting their machines enrolled in ironic, it seems like an + un-necessary scan and has thus been disabled. diff --git a/zuul.d/ironic-python-agent-jobs.yaml b/zuul.d/ironic-python-agent-jobs.yaml index a7754e084..bba9b34a3 100644 --- a/zuul.d/ironic-python-agent-jobs.yaml +++ b/zuul.d/ironic-python-agent-jobs.yaml @@ -154,6 +154,20 @@ IRONIC_AUTOMATED_CLEAN_ENABLED: False IRONIC_BUILD_DEPLOY_RAMDISK: True +# This job will superceed the above centos7 metalsmith integration job +- job: + name: metalsmith-integration-ipa-src-uefi + parent: metalsmith-integration-glance-localboot-centos8-uefi + required-projects: + - openstack/ironic-python-agent + - openstack/ironic-python-agent-builder + - openstack/ironic-lib + vars: + devstack_localrc: + # Don't waste time on cleaning, it's checked everywhere else + IRONIC_AUTOMATED_CLEAN_ENABLED: False + IRONIC_BUILD_DEPLOY_RAMDISK: True + # used by ironic-python-agent-builder # we can use focal here but we need to disable dstat because of # https://bugs.launchpad.net/ubuntu/+source/dstat/+bug/1866619 diff --git a/zuul.d/project.yaml b/zuul.d/project.yaml index 3d9811970..062537481 100644 --- a/zuul.d/project.yaml +++ b/zuul.d/project.yaml @@ -16,6 +16,8 @@ - ipa-tempest-bios-ipmi-iscsi-src - ironic-standalone-ipa-src - metalsmith-integration-ipa-src + - metalsmith-integration-ipa-src-uefi: + voting: false # NOTE(dtantsur): non-voting because IPA source code is very unlikely # to break them. They rather serve as a canary for broken POST jobs. - ironic-python-agent-check-image-tinyipa: