Merge "Restart the ceph-mgr daemon every 7 days to control RSS memory growth"
This commit is contained in:
commit
d1ba1d9e80
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
#
|
#
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
# Copyright (c) 2019-2023 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
@ -32,6 +32,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import daemon
|
import daemon
|
||||||
import psutil
|
import psutil
|
||||||
@ -141,6 +142,10 @@ class Config(object):
|
|||||||
# restful plugin recovers
|
# restful plugin recovers
|
||||||
self.ping_fail_count_report_error = 5
|
self.ping_fail_count_report_error = 5
|
||||||
|
|
||||||
|
# Number of days for ceph-mgr to be restarted to avoid possible
|
||||||
|
# memory overflow due to memory growth (-1 to disable)
|
||||||
|
self.ceph_mgr_lifecycle_days = 7
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load():
|
def load():
|
||||||
return Config()
|
return Config()
|
||||||
@ -277,6 +282,9 @@ class ServiceMonitor(object):
|
|||||||
# ceph-mgr process
|
# ceph-mgr process
|
||||||
self.ceph_mgr = None
|
self.ceph_mgr = None
|
||||||
|
|
||||||
|
# date the ceph-mgr process was started
|
||||||
|
self.ceph_mgr_start_date = None
|
||||||
|
|
||||||
# consecutive ceph-mgr/restful-plugin start failures. Service monitor
|
# consecutive ceph-mgr/restful-plugin start failures. Service monitor
|
||||||
# reports failure after CONFIG.ceph_mgr_max_failure_count
|
# reports failure after CONFIG.ceph_mgr_max_failure_count
|
||||||
self.ceph_mgr_failure_count = 0
|
self.ceph_mgr_failure_count = 0
|
||||||
@ -570,6 +578,12 @@ class ServiceMonitor(object):
|
|||||||
# REST API should be available now
|
# REST API should be available now
|
||||||
# start making periodic requests (ping)
|
# start making periodic requests (ping)
|
||||||
while True:
|
while True:
|
||||||
|
if self.ceph_mgr_lifecycle_days != -1 \
|
||||||
|
and self.ceph_mgr_uptime() >= self.ceph_mgr_lifecycle_days:
|
||||||
|
self.ceph_mgr_start_date = None
|
||||||
|
LOG.info("Restarting ceph-mgr to control RSS memory growth")
|
||||||
|
self.ceph_mgr_restart()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.restful_plugin_ping()
|
self.restful_plugin_ping()
|
||||||
self.ping_failure_count = 0
|
self.ping_failure_count = 0
|
||||||
@ -710,6 +724,7 @@ class ServiceMonitor(object):
|
|||||||
stdout=null,
|
stdout=null,
|
||||||
stderr=null,
|
stderr=null,
|
||||||
shell=False)
|
shell=False)
|
||||||
|
self.ceph_mgr_start_date = datetime.now()
|
||||||
except (OSError, ValueError) as err:
|
except (OSError, ValueError) as err:
|
||||||
raise CephMgrStartFailed(reason=str(err))
|
raise CephMgrStartFailed(reason=str(err))
|
||||||
time.sleep(CONFIG.ceph_mgr_grace_period_sec)
|
time.sleep(CONFIG.ceph_mgr_grace_period_sec)
|
||||||
@ -720,6 +735,15 @@ class ServiceMonitor(object):
|
|||||||
LOG.info('Stop ceph-mgr')
|
LOG.info('Stop ceph-mgr')
|
||||||
psutil_terminate_kill(self.ceph_mgr, CONFIG.ceph_mgr_kill_delay_sec)
|
psutil_terminate_kill(self.ceph_mgr, CONFIG.ceph_mgr_kill_delay_sec)
|
||||||
|
|
||||||
|
def ceph_mgr_restart(self):
|
||||||
|
self.ceph_mgr_stop()
|
||||||
|
self.ceph_mgr_start()
|
||||||
|
|
||||||
|
def ceph_mgr_uptime(self):
|
||||||
|
if not self.ceph_mgr_start_date:
|
||||||
|
return 0
|
||||||
|
return (datetime.now() - self.ceph_mgr_start_date).days
|
||||||
|
|
||||||
def restful_plugin_has_server_port(self):
|
def restful_plugin_has_server_port(self):
|
||||||
try:
|
try:
|
||||||
with open(os.devnull, 'wb') as null:
|
with open(os.devnull, 'wb') as null:
|
||||||
|
Loading…
Reference in New Issue
Block a user