From c167159c4a95506f5c72671c9e8d6d59869666cc Mon Sep 17 00:00:00 2001 From: Hirotaka Wakabayashi Date: Fri, 15 Jul 2022 18:36:06 +0900 Subject: [PATCH] Fixes the way to check db instance status This PR changes the way to check instance status. Since Victoria, trove has change the db instance status name from `RUNNING` to `HEALTHY`[1]. Original problem: Some clustering databases like Apache Cassandra fail to update cluster status because they check db instance status using `RUNNING`. [1]: https://opendev.org/openstack/trove/commit/a0a10f0b947c63ac06787b490afd0ebecef1477e Story: 2010147 Task: 45791 Change-Id: Iaa032fb46ed51b6e416e7d4efdfd272924ba146b --- ...atabase-status-check-61fa0f49dd786c72.yaml | 6 +++ .../experimental/cassandra/taskmanager.py | 2 +- trove/taskmanager/models.py | 11 ++++ .../unittests/taskmanager/test_clusters.py | 54 +++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix-clustering-database-status-check-61fa0f49dd786c72.yaml diff --git a/releasenotes/notes/fix-clustering-database-status-check-61fa0f49dd786c72.yaml b/releasenotes/notes/fix-clustering-database-status-check-61fa0f49dd786c72.yaml new file mode 100644 index 0000000000..d1ff2c6ba9 --- /dev/null +++ b/releasenotes/notes/fix-clustering-database-status-check-61fa0f49dd786c72.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fix the way to check instance status. Since Victoria, Trove has change + the database instance status name from `RUNNING` to `HEALTHY` but + some clustering databases like Apache Cassandra stil used RUNNING. diff --git a/trove/common/strategies/cluster/experimental/cassandra/taskmanager.py b/trove/common/strategies/cluster/experimental/cassandra/taskmanager.py index f74c74a935..d867dddd10 100644 --- a/trove/common/strategies/cluster/experimental/cassandra/taskmanager.py +++ b/trove/common/strategies/cluster/experimental/cassandra/taskmanager.py @@ -238,7 +238,7 @@ class CassandraClusterTasks(task_models.ClusterTasks): node['guest'].node_cleanup() LOG.debug("Waiting for node to finish its " "cleanup: %s", nid) - if not self._all_instances_running([nid], cluster_id): + if not self._all_instances_healthy([nid], cluster_id): LOG.warning("Node did not complete cleanup " "successfully: %s", nid) diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 53eac6eff7..c76166a793 100755 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -230,6 +230,17 @@ class ClusterTasks(Cluster): ] ) + def _all_instances_healthy(self, instance_ids, cluster_id, shard_id=None): + """Wait for all instances to become HEALTHY.""" + return self._all_instances_acquire_status( + instance_ids, cluster_id, shard_id, + srvstatus.ServiceStatuses.HEALTHY, + fast_fail_statuses=[ + srvstatus.ServiceStatuses.FAILED, + srvstatus.ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT + ] + ) + def _all_instances_acquire_status( self, instance_ids, cluster_id, shard_id, expected_status, fast_fail_statuses=None): diff --git a/trove/tests/unittests/taskmanager/test_clusters.py b/trove/tests/unittests/taskmanager/test_clusters.py index ab881fde1d..451d72aef9 100644 --- a/trove/tests/unittests/taskmanager/test_clusters.py +++ b/trove/tests/unittests/taskmanager/test_clusters.py @@ -34,6 +34,60 @@ from trove.instance.service_status import ServiceStatuses from trove.tests.unittests import trove_testtools +class CassandraClusterTasksTest(trove_testtools.TestCase): + def setUp(self): + super(CassandraClusterTasksTest, self).setUp() + self.cluster_id = "1234" + self.cluster_name = "test1" + self.tenant_id = "2345" + self.db_cluster = DBCluster(ClusterTaskStatus.NONE, + id=self.cluster_id, + created=str(datetime.date), + updated=str(datetime.date), + name=self.cluster_name, + task_id=ClusterTaskStatus.NONE._code, + tenant_id=self.tenant_id, + datastore_version_id="1", + deleted=False) + self.dbinst1 = DBInstance(InstanceTasks.NONE, id="1", name="member1", + compute_instance_id="compute-1", + task_id=InstanceTasks.NONE._code, + task_description=InstanceTasks.NONE._db_text, + volume_id="volume-1", + datastore_version_id="1", + cluster_id=self.cluster_id, + shard_id="shard-1", + type="member") + self.dbinst2 = DBInstance(InstanceTasks.NONE, id="2", name="member2", + compute_instance_id="compute-2", + task_id=InstanceTasks.NONE._code, + task_description=InstanceTasks.NONE._db_text, + volume_id="volume-2", + datastore_version_id="1", + cluster_id=self.cluster_id, + shard_id="shard-1", + type="member") + mock_ds1 = Mock() + mock_ds1.name = 'cassandra' + mock_dv1 = Mock() + mock_dv1.name = '4.0.0' + self.clustertasks = ClusterTasks(Mock(), + self.db_cluster, + datastore=mock_ds1, + datastore_version=mock_dv1) + + @patch.object(DBInstance, 'find_by') + @patch.object(InstanceServiceStatus, 'find_by') + def test_all_instances_healthy(self, mock_find, mock_db_find): + (mock_find.return_value. + get_status.return_value) = ServiceStatuses.HEALTHY + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE + ret_val = self.clustertasks._all_instances_healthy(["1", "2"], + self.cluster_id) + self.assertTrue(ret_val) + + class MongoDbClusterTasksTest(trove_testtools.TestCase): def setUp(self): super(MongoDbClusterTasksTest, self).setUp()