diff --git a/hooks/hooks.py b/hooks/hooks.py index 5cc7b9f3..7db0e240 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -255,6 +255,12 @@ def mon_relation(rid=None, unit=None): if systemd_based_radosgw(): service_stop('radosgw') service('disable', 'radosgw') + # Update the nrpe config. If we wait for the below + # to be called elsewhere, there exists a period + # where nagios will report the radosgw service as + # down, and also not be monitoring the per + # host services. + update_nrpe_config(checks_to_remove=['radosgw']) service('enable', service_name()) # NOTE(jamespage): @@ -362,13 +368,28 @@ def ha_relation_changed(): @hooks.hook('nrpe-external-master-relation-joined', 'nrpe-external-master-relation-changed') -def update_nrpe_config(): +def update_nrpe_config(checks_to_remove=None): + """ + Update the checks for the nagios plugin. + + :param checks_to_remove: list of short names of nrpe checks to + remove. For example, pass ['radosgw'] to remove the check for + the default systemd radosgw service, to make way for per host + services. + :type checks_to_remove: list + + """ # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() + if checks_to_remove is not None: + log("Removing the following nrpe checks: {}".format(checks_to_remove), + level=DEBUG) + for svc in checks_to_remove: + nrpe_setup.remove_check(shortname=svc) nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write() diff --git a/unit_tests/test_hooks.py b/unit_tests/test_hooks.py index 3051572b..cc046b83 100644 --- a/unit_tests/test_hooks.py +++ b/unit_tests/test_hooks.py @@ -671,3 +671,34 @@ class SlaveMultisiteTests(CephRadosMultisiteTests): self.is_leader.return_value = False ceph_hooks.slave_relation_changed('slave:1', 'rgw/0') self.relation_get.assert_not_called() + + @patch.object(ceph_hooks, 'apt_install') + @patch.object(ceph_hooks, 'services') + @patch.object(ceph_hooks, 'nrpe') + def test_update_nrpe_config(self, nrpe, services, apt_install): + # Setup Mocks + nrpe.get_nagios_hostname.return_value = 'foo' + nrpe.get_nagios_unit_name.return_value = 'bar' + nrpe_setup = MagicMock() + nrpe.NRPE.return_value = nrpe_setup + services.return_value = ['baz', 'qux'] + + # Call the routine + ceph_hooks.update_nrpe_config() + + # Verify calls + apt_install.assert_called() + nrpe.get_nagios_hostname.assert_called() + nrpe.get_nagios_unit_name.assert_called() + nrpe.copy_nrpe_checks.assert_called() + nrpe.remove_check.assert_not_called() + nrpe.add_init_service_checks.assert_called_with(nrpe_setup, + ['baz', 'qux'], 'bar') + nrpe.add_haproxy_checks.assert_called_with(nrpe_setup, 'bar') + nrpe_setup.write.assert_called() + + # Verify that remove_check is called appropriately if we pass + # checks_to_remove + ceph_hooks.update_nrpe_config(checks_to_remove=['quux', 'quuux']) + nrpe_setup.remove_check.assert_has_calls([call(shortname='quux'), + call(shortname='quuux')])