Prevent mariadb from split brain while cluster is in reboot state
The current start logic when existing cluster state is reboot can lead to a split brain condition under certain circumstances. This patchset adds some additional step to ensure cluster is set to live state once leader node is ready to start, instead of relying on slave nodes to handle. Also add some simple retry when there is collision detected while trying to write to configmap. The existing hair-trigger that will put the cluster state from "live" into "reboot" can use some fine tuning, but updating it properly should require additional investigation and testing, hence should be done as a separate activity outside the scope of this patchset. Change-Id: Ieb2861d6fbc435e24e20d13c7b358c751890b4c4
This commit is contained in:
parent
3435a3a82a
commit
5a8aabaee3
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v10.2.31
|
||||
description: OpenStack-Helm MariaDB
|
||||
name: mariadb
|
||||
version: 0.2.0
|
||||
version: 0.2.1
|
||||
home: https://mariadb.com/kb/en/
|
||||
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
|
||||
sources:
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import select
|
||||
import signal
|
||||
import subprocess # nosec
|
||||
@ -58,6 +59,8 @@ kubernetes_version = kubernetes.client.VersionApi().get_code().git_version
|
||||
logger.info("Kubernetes API Version: {0}".format(kubernetes_version))
|
||||
k8s_api_instance = kubernetes.client.CoreV1Api()
|
||||
|
||||
# Setup secrets generator
|
||||
secretsGen = secrets.SystemRandom()
|
||||
|
||||
def check_env_var(env_var):
|
||||
"""Check if an env var exists.
|
||||
@ -325,26 +328,33 @@ def safe_update_configmap(configmap_dict, configmap_patch):
|
||||
# ensure nothing else has modified the confimap since we read it.
|
||||
configmap_patch['metadata']['resourceVersion'] = configmap_dict[
|
||||
'metadata']['resource_version']
|
||||
try:
|
||||
api_response = k8s_api_instance.patch_namespaced_config_map(
|
||||
name=state_configmap_name,
|
||||
namespace=pod_namespace,
|
||||
body=configmap_patch)
|
||||
return True
|
||||
except kubernetes.client.rest.ApiException as error:
|
||||
if error.status == 409:
|
||||
# This status code indicates a collision trying to write to the
|
||||
# config map while another instance is also trying the same.
|
||||
logger.warning("Collision writing configmap: {0}".format(error))
|
||||
# This often happens when the replicas were started at the same
|
||||
# time, and tends to be persistent. Sleep briefly to break the
|
||||
# synchronization.
|
||||
time.sleep(1)
|
||||
return True
|
||||
else:
|
||||
logger.error("Failed to set configmap: {0}".format(error))
|
||||
return error
|
||||
|
||||
# Retry up to 8 times in case of 409 only. Each retry has a ~1 second
|
||||
# sleep in between so do not want to exceed the roughly 10 second
|
||||
# write interval per cm update.
|
||||
for i in range(8):
|
||||
try:
|
||||
api_response = k8s_api_instance.patch_namespaced_config_map(
|
||||
name=state_configmap_name,
|
||||
namespace=pod_namespace,
|
||||
body=configmap_patch)
|
||||
return True
|
||||
except kubernetes.client.rest.ApiException as error:
|
||||
if error.status == 409:
|
||||
# This status code indicates a collision trying to write to the
|
||||
# config map while another instance is also trying the same.
|
||||
logger.warning("Collision writing configmap: {0}".format(error))
|
||||
# This often happens when the replicas were started at the same
|
||||
# time, and tends to be persistent. Sleep with some random
|
||||
# jitter value briefly to break the synchronization.
|
||||
naptime = secretsGen.uniform(0.8,1.2)
|
||||
time.sleep(naptime)
|
||||
else:
|
||||
logger.error("Failed to set configmap: {0}".format(error))
|
||||
return error
|
||||
logger.info("Retry writing configmap attempt={0} sleep={1}".format(
|
||||
i+1, naptime))
|
||||
return True
|
||||
|
||||
def set_configmap_annotation(key, value):
|
||||
"""Update a configmap's annotations via patching.
|
||||
@ -843,6 +853,14 @@ def run_mysqld(cluster='existing'):
|
||||
"This is a fresh node joining the cluster for the 1st time, not attempting to set admin passwords"
|
||||
)
|
||||
|
||||
# Node ready to start MariaDB, update cluster state to live and remove
|
||||
# reboot node info, if set previously.
|
||||
if cluster == 'new':
|
||||
set_configmap_annotation(
|
||||
key='openstackhelm.openstack.org/cluster.state', value='live')
|
||||
set_configmap_annotation(
|
||||
key='openstackhelm.openstack.org/reboot.node', value='')
|
||||
|
||||
logger.info("Launching MariaDB")
|
||||
run_cmd_with_logging(mysqld_cmd, logger)
|
||||
|
||||
|
@ -16,4 +16,5 @@ mariadb:
|
||||
- 0.1.13 Fix race condition for grastate.dat
|
||||
- 0.1.14 Update mysqld-exporter image to v0.12.1
|
||||
- 0.2.0 Uplift mariadb version and ubuntu release
|
||||
- 0.2.1 Prevent potential splitbrain issue if cluster is in reboot state
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user