From 8cd982be5cdcba4825a925c648b08e76957b0044 Mon Sep 17 00:00:00 2001 From: "wu.chunyang" Date: Tue, 1 Aug 2023 07:30:52 +0000 Subject: [PATCH] Fix postgresql database creation failures from prepare func postgresql needs to restart during the bootstrap, but the postgres driver still reports the healthy status. this commit resets the healthy_counts when status is not healthy, and make state_healthy_counts configurable. Change-Id: I746b86326790dbc667f4f0d6dabcd1a656502273 --- ...stgresql-database-create-failed-abd4f99cc7dde44c.yaml | 4 ++++ trove/common/cfg.py | 2 ++ trove/guestagent/datastore/service.py | 9 +++++---- 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/fix-postgresql-database-create-failed-abd4f99cc7dde44c.yaml diff --git a/releasenotes/notes/fix-postgresql-database-create-failed-abd4f99cc7dde44c.yaml b/releasenotes/notes/fix-postgresql-database-create-failed-abd4f99cc7dde44c.yaml new file mode 100644 index 0000000000..864cdb58bc --- /dev/null +++ b/releasenotes/notes/fix-postgresql-database-create-failed-abd4f99cc7dde44c.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fix potential PostgreSQL database creation failures from the instance create API \ No newline at end of file diff --git a/trove/common/cfg.py b/trove/common/cfg.py index a717a71c6b..141d6b84d1 100644 --- a/trove/common/cfg.py +++ b/trove/common/cfg.py @@ -201,6 +201,8 @@ common_opts = [ 'change.'), cfg.IntOpt('state_change_poll_time', default=3, help='Interval between state change poll requests (seconds).'), + cfg.IntOpt('state_healthy_counts', default=5, + help='consecutive success db connections for status HEALTHY'), cfg.IntOpt('agent_heartbeat_time', default=10, help='Maximum time (in seconds) for the Guest Agent to reply ' 'to a heartbeat request.'), diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py index 92d75604b9..afa4d234de 100644 --- a/trove/guestagent/datastore/service.py +++ b/trove/guestagent/datastore/service.py @@ -290,15 +290,15 @@ class BaseDbStatus(object): # outside. loop = True - # We need 3 (by default) consecutive success db connections for status + # We need 5 (by default) consecutive success db connections for status # 'HEALTHY' healthy_count = 0 - + state_healthy_counts = CONF.state_healthy_counts - 1 while loop: self.status = self.get_actual_db_status() if self.status == status: if (status == service_status.ServiceStatuses.HEALTHY and - healthy_count < 2): + healthy_count < state_healthy_counts): healthy_count += 1 time.sleep(CONF.state_change_poll_time) continue @@ -310,7 +310,8 @@ class BaseDbStatus(object): # should we remain in this loop? this is the thing # that emulates the do-while construct. loop = (time.time() < end_time) - + # reset the healthy_count + healthy_count = 0 # no point waiting if our time is up and we're # just going to error out anyway. if loop: