Fix check if restore in progress

When performing the BnR procedure with the wipe_ceph_osds flag and the rook-ceph backend configured, an error was given when removing the app. This happened because a restore in progress check in the DB was done in the app's lifecycle and false was always returned, as the insert had not yet been performed before this task. To fix this, the database query has been replaced by checking the '/etc/platform/.restore_in_progress' flag. Test Plan: - PASS: Build rook-ceph app - PASS: optimized AIO-SX B&R with wipe_ceph_osds flag - PASS: legacy STD + DX B&R with wipe_ceph_osds flag Partial-Bug: 2086473 Change-Id: Ica3befe51ff08a53eb1b33af12e96fa4358e6c0f Signed-off-by: Erickson Silva de Oliveira <Erickson.SilvadeOliveira@windriver.com>
2024-11-02 15:55:44 -03:00 · 2024-11-02 15:55:44 -03:00 · 3fb190bfe6
commit 3fb190bfe6
parent 0670391cad
1 changed files with 5 additions and 17 deletions
--- a/python3-k8sapp-rook-ceph/k8sapp_rook_ceph/k8sapp_rook_ceph/lifecycle/lifecycle_rook_ceph.py
+++ b/python3-k8sapp-rook-ceph/k8sapp_rook_ceph/k8sapp_rook_ceph/lifecycle/lifecycle_rook_ceph.py
@ -12,6 +12,7 @@
 import re
 import json

+from os import path
 from time import sleep
 from subprocess import run
 from string import Template
@ -152,11 +153,8 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
        :param app: AppOperator.Application object

        """
-
-        if (not self._verify_restore_in_progress(app_op._dbapi) or
-                cutils.is_aio_simplex_system(app_op._dbapi)):
-            LOG.info("Cleaning up the ceph cluster")
-            self.cluster_cleanup(app_op, context)
+        LOG.info("Cleaning up the ceph cluster")
+        self.cluster_cleanup(app_op, context)

        LOG.info("Removing ceph alarms")
        self.remove_alarms(app_op)
@ -485,7 +483,7 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
        # -------
        # Conditionally force clean the cluster in cleanup jobs not completed successfully
        # -------
-        if not is_jobs_completed:
+        if not is_jobs_completed and not path.isfile(constants.RESTORE_IN_PROGRESS_FLAG):
            LOG.info("Cleanup Jobs did not completed. Force removing finalizers and wiping OSDs")
            self.wipe_all_osds(app_op._dbapi, context)
            self.remove_resource_finalizers()
@ -885,7 +883,7 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
        dbapi = app_op._dbapi

        # Check if is being called by backup and restore process
-        if not self._verify_restore_in_progress(dbapi):
+        if not path.isfile(constants.RESTORE_IN_PROGRESS_FLAG):
            # CHECK AND FAIL: All hosts must be unlocked/enabled/{avaliable,degraded}
            hosts = self.get_hosts(dbapi, {}, only_rook=True)
            for host in hosts:
@ -1816,16 +1814,6 @@ class RookCephAppLifecycleOperator(base.AppLifecycleOperator):
                            namespace="rook-ceph"
            )

-    def _verify_restore_in_progress(self, dbapi):
-        """Check if restore is in progress"""
-        try:
-            dbapi.restore_get_one(
-                filters={'state': constants.RESTORE_STATE_IN_PROGRESS})
-        except exception.NotFound:
-            return False
-        else:
-            return True
-
    def create_job_to_rm_mon_data(self, hostname, mon_name):
        LOG.info("Creating job to remove mon-%s data from %s" % (mon_name, hostname))
        remove_mon_job_template = self.get_rm_mon_data_job_template()