RBD: Fix stats reporting
Current RBD code is incorrectly reporting the stats of the pool in the following ways: - `provisioned_capacity_gb` contains physical space used by cinder created volumes. - `free_capacity_gb` is not taking into account that pools can have quota restrictions and they should be used as the reference for the free capacity. - `total_capacity` dynamically changes, which means that there is no way to have a fixed over provisioning capacity. This patch fixes the stats reporting making sure we return the right values in `provisioned_capacity_gb` and `free_capacity_gb`, and allows us to use a static calculation of the `total_capacity` using `report_dynamic_total_capacity` configuration option. We don't report `allocated_capacity_gb` because this is something that is calculated by the Cinder core code and should not be reported by drivers, even if it's not currently working as expected [1][2]. [1] https://bugs.launchpad.net/cinder/+bug/1712549 [2] https://bugs.launchpad.net/cinder/+bug/1706057 Change-Id: I1e82bf9d0b6cc0fb1d1fc2dd8b8ccc59aea3f73f Closes-Bug: #1706060
This commit is contained in:
parent
42746c68dd
commit
8469109016
@ -1121,19 +1121,13 @@ class RBDTestCase(test.TestCase):
|
|||||||
|
|
||||||
@ddt.data(True, False)
|
@ddt.data(True, False)
|
||||||
@common_mocks
|
@common_mocks
|
||||||
def test_update_volume_stats(self, replication_enabled):
|
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
|
||||||
client = self.mock_client.return_value
|
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
|
||||||
client.__enter__.return_value = client
|
def test_update_volume_stats(self, replication_enabled, stats_mock,
|
||||||
|
usage_mock):
|
||||||
client.cluster = mock.Mock()
|
stats_mock.return_value = (mock.sentinel.free_capacity_gb,
|
||||||
client.cluster.mon_command = mock.Mock()
|
mock.sentinel.total_capacity_gb)
|
||||||
client.cluster.mon_command.return_value = (
|
usage_mock.return_value = mock.sentinel.provisioned_capacity_gb
|
||||||
0, '{"stats":{"total_bytes":64385286144,'
|
|
||||||
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
|
|
||||||
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
|
|
||||||
'"bytes_used":1546440971,"max_avail":28987613184,"objects":412}},'
|
|
||||||
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
|
|
||||||
'"max_avail":28987613184,"objects":0}}]}\n', '')
|
|
||||||
|
|
||||||
expected = dict(
|
expected = dict(
|
||||||
volume_backend_name='RBD',
|
volume_backend_name='RBD',
|
||||||
@ -1141,11 +1135,11 @@ class RBDTestCase(test.TestCase):
|
|||||||
vendor_name='Open Source',
|
vendor_name='Open Source',
|
||||||
driver_version=self.driver.VERSION,
|
driver_version=self.driver.VERSION,
|
||||||
storage_protocol='ceph',
|
storage_protocol='ceph',
|
||||||
total_capacity_gb=28.44,
|
total_capacity_gb=mock.sentinel.total_capacity_gb,
|
||||||
free_capacity_gb=27.0,
|
free_capacity_gb=mock.sentinel.free_capacity_gb,
|
||||||
reserved_percentage=0,
|
reserved_percentage=0,
|
||||||
thin_provisioning_support=True,
|
thin_provisioning_support=True,
|
||||||
provisioned_capacity_gb=0.0,
|
provisioned_capacity_gb=mock.sentinel.provisioned_capacity_gb,
|
||||||
max_over_subscription_ratio=1.0,
|
max_over_subscription_ratio=1.0,
|
||||||
multiattach=False)
|
multiattach=False)
|
||||||
|
|
||||||
@ -1162,19 +1156,12 @@ class RBDTestCase(test.TestCase):
|
|||||||
mock_driver_configuration)
|
mock_driver_configuration)
|
||||||
|
|
||||||
actual = self.driver.get_volume_stats(True)
|
actual = self.driver.get_volume_stats(True)
|
||||||
client.cluster.mon_command.assert_called_once_with(
|
|
||||||
'{"prefix":"df", "format":"json"}', '')
|
|
||||||
self.assertDictEqual(expected, actual)
|
self.assertDictEqual(expected, actual)
|
||||||
|
|
||||||
@common_mocks
|
@common_mocks
|
||||||
def test_update_volume_stats_error(self):
|
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
|
||||||
client = self.mock_client.return_value
|
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
|
||||||
client.__enter__.return_value = client
|
def test_update_volume_stats_error(self, stats_mock, usage_mock):
|
||||||
|
|
||||||
client.cluster = mock.Mock()
|
|
||||||
client.cluster.mon_command = mock.Mock()
|
|
||||||
client.cluster.mon_command.return_value = (22, '', '')
|
|
||||||
|
|
||||||
self.mock_object(self.driver.configuration, 'safe_get',
|
self.mock_object(self.driver.configuration, 'safe_get',
|
||||||
mock_driver_configuration)
|
mock_driver_configuration)
|
||||||
|
|
||||||
@ -1187,15 +1174,66 @@ class RBDTestCase(test.TestCase):
|
|||||||
free_capacity_gb='unknown',
|
free_capacity_gb='unknown',
|
||||||
reserved_percentage=0,
|
reserved_percentage=0,
|
||||||
multiattach=False,
|
multiattach=False,
|
||||||
provisioned_capacity_gb=0.0,
|
provisioned_capacity_gb=0,
|
||||||
max_over_subscription_ratio=1.0,
|
max_over_subscription_ratio=1.0,
|
||||||
thin_provisioning_support=True)
|
thin_provisioning_support=True)
|
||||||
|
|
||||||
actual = self.driver.get_volume_stats(True)
|
actual = self.driver.get_volume_stats(True)
|
||||||
client.cluster.mon_command.assert_called_once_with(
|
|
||||||
'{"prefix":"df", "format":"json"}', '')
|
|
||||||
self.assertDictEqual(expected, actual)
|
self.assertDictEqual(expected, actual)
|
||||||
|
|
||||||
|
@ddt.data(
|
||||||
|
# Normal case, no quota and dynamic total
|
||||||
|
{'free_capacity': 27.0, 'total_capacity': 28.44},
|
||||||
|
# No quota and static total
|
||||||
|
{'dynamic_total': False,
|
||||||
|
'free_capacity': 27.0, 'total_capacity': 59.96},
|
||||||
|
# Quota and dynamic total
|
||||||
|
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
|
||||||
|
'free_capacity': 1, 'total_capacity': 2.44},
|
||||||
|
# Quota and static total
|
||||||
|
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
|
||||||
|
'dynamic_total': False,
|
||||||
|
'free_capacity': 1, 'total_capacity': 3.00},
|
||||||
|
# Quota and dynamic total when free would be negative
|
||||||
|
{'quota_max_bytes': 1073741824,
|
||||||
|
'free_capacity': 0, 'total_capacity': 1.44},
|
||||||
|
)
|
||||||
|
@ddt.unpack
|
||||||
|
@common_mocks
|
||||||
|
def test_get_pool(self, free_capacity, total_capacity,
|
||||||
|
max_avail=28987613184, quota_max_bytes=0,
|
||||||
|
dynamic_total=True):
|
||||||
|
client = self.mock_client.return_value
|
||||||
|
client.__enter__.return_value = client
|
||||||
|
client.cluster.mon_command.side_effect = [
|
||||||
|
(0, '{"stats":{"total_bytes":64385286144,'
|
||||||
|
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
|
||||||
|
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
|
||||||
|
'"bytes_used":1546440971,"max_avail":%s,"objects":412}},'
|
||||||
|
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
|
||||||
|
'"max_avail":28987613184,"objects":0}}]}\n' % max_avail, ''),
|
||||||
|
(0, '{"pool_name":"volumes","pool_id":4,"quota_max_objects":0,'
|
||||||
|
'"quota_max_bytes":%s}\n' % quota_max_bytes, ''),
|
||||||
|
]
|
||||||
|
with mock.patch.object(self.driver.configuration, 'safe_get',
|
||||||
|
return_value=dynamic_total):
|
||||||
|
result = self.driver._get_pool_stats()
|
||||||
|
client.cluster.mon_command.assert_has_calls([
|
||||||
|
mock.call('{"prefix":"df", "format":"json"}', ''),
|
||||||
|
mock.call('{"prefix":"osd pool get-quota", "pool": "rbd",'
|
||||||
|
' "format":"json"}', ''),
|
||||||
|
])
|
||||||
|
self.assertEqual((free_capacity, total_capacity), result)
|
||||||
|
|
||||||
|
@common_mocks
|
||||||
|
def test_get_pool_stats_failure(self):
|
||||||
|
client = self.mock_client.return_value
|
||||||
|
client.__enter__.return_value = client
|
||||||
|
client.cluster.mon_command.return_value = (-1, '', '')
|
||||||
|
|
||||||
|
result = self.driver._get_pool_stats()
|
||||||
|
self.assertEqual(('unknown', 'unknown'), result)
|
||||||
|
|
||||||
@common_mocks
|
@common_mocks
|
||||||
def test_get_mon_addrs(self):
|
def test_get_mon_addrs(self):
|
||||||
with mock.patch.object(self.driver, '_execute') as mock_execute:
|
with mock.patch.object(self.driver, '_execute') as mock_execute:
|
||||||
@ -1788,32 +1826,42 @@ class RBDTestCase(test.TestCase):
|
|||||||
self.assertEqual(RAISED_EXCEPTIONS,
|
self.assertEqual(RAISED_EXCEPTIONS,
|
||||||
[self.mock_rbd.ImageExists])
|
[self.mock_rbd.ImageExists])
|
||||||
|
|
||||||
@ddt.data({'image_size': [1, 1], 'total_usage': 2},
|
@mock.patch('cinder.volume.drivers.rbd.RBDVolumeProxy')
|
||||||
{'image_size': MockImageNotFoundException, 'total_usage': 0})
|
@mock.patch('cinder.volume.drivers.rbd.RADOSClient')
|
||||||
@ddt.unpack
|
@mock.patch('cinder.volume.drivers.rbd.RBDDriver.RBDProxy')
|
||||||
@mock.patch.object(driver, 'RADOSClient')
|
def test__get_usage_info(self, rbdproxy_mock, client_mock, volproxy_mock):
|
||||||
@mock.patch.object(driver, 'RBDVolumeProxy')
|
def FakeVolProxy(size):
|
||||||
def test__get_usage_info(self, volume_proxy, mock_rados_client,
|
if size == -1:
|
||||||
image_size, total_usage):
|
size_mock = mock.Mock(side_effect=MockImageNotFoundException)
|
||||||
class FakeRBDProxy(object):
|
else:
|
||||||
def list(self, ioctx):
|
size_mock = mock.Mock(return_value=size * units.Gi)
|
||||||
return ['volume-1', 'volume-2']
|
return mock.Mock(return_value=mock.Mock(size=size_mock))
|
||||||
|
|
||||||
def diff_iterate(offset, length, from_snapshot, iterate_cb):
|
volumes = ['volume-1', 'non-existent', 'non-cinder-volume']
|
||||||
self.driver._iterate_cb(offset, length, True)
|
|
||||||
|
|
||||||
self.driver._total_usage = 0
|
client = client_mock.return_value.__enter__.return_value
|
||||||
with mock.patch.object(self.driver, 'RBDProxy') as rbd_proxy:
|
rbdproxy_mock.return_value.list.return_value = volumes
|
||||||
with mock.patch.object(self.driver, 'rbd') as mock_rbd:
|
|
||||||
mock_rbd.ImageNotFound = MockImageNotFoundException
|
volproxy_mock.side_effect = [
|
||||||
proxy_list = mock.Mock()
|
mock.Mock(**{'__enter__': FakeVolProxy(1.0),
|
||||||
proxy_list.side_effect = ['volume-1', 'volume-2']
|
'__exit__': mock.Mock()}),
|
||||||
rbd_proxy.return_value = FakeRBDProxy()
|
mock.Mock(**{'__enter__': FakeVolProxy(-1),
|
||||||
image = volume_proxy.return_value.__enter__.return_value
|
'__exit__': mock.Mock()}),
|
||||||
image.size.side_effect = image_size
|
mock.Mock(**{'__enter__': FakeVolProxy(2.0),
|
||||||
image.diff_iterate.side_effect = diff_iterate
|
'__exit__': mock.Mock()})
|
||||||
self.driver._get_usage_info()
|
]
|
||||||
self.assertEqual(total_usage, self.driver._total_usage)
|
|
||||||
|
with mock.patch.object(self.driver, 'rbd') as mock_rbd:
|
||||||
|
mock_rbd.ImageNotFound = MockImageNotFoundException
|
||||||
|
total_provision = self.driver._get_usage_info()
|
||||||
|
|
||||||
|
rbdproxy_mock.return_value.list.assert_called_once_with(client.ioctx)
|
||||||
|
volproxy_mock.assert_has_calls([
|
||||||
|
mock.call(self.driver, volumes[0], read_only=True),
|
||||||
|
mock.call(self.driver, volumes[1], read_only=True),
|
||||||
|
])
|
||||||
|
|
||||||
|
self.assertEqual(3.00, total_provision)
|
||||||
|
|
||||||
|
|
||||||
class ManagedRBDTestCase(test_driver.BaseDriverTestCase):
|
class ManagedRBDTestCase(test_driver.BaseDriverTestCase):
|
||||||
|
@ -92,6 +92,11 @@ RBD_OPTS = [
|
|||||||
'ceph cluster to do a demotion/promotion of volumes. '
|
'ceph cluster to do a demotion/promotion of volumes. '
|
||||||
'If value < 0, no timeout is set and default librados '
|
'If value < 0, no timeout is set and default librados '
|
||||||
'value is used.'),
|
'value is used.'),
|
||||||
|
cfg.BoolOpt('report_dynamic_total_capacity', default=True,
|
||||||
|
help='Set to True for driver to report total capacity as a '
|
||||||
|
'dynamic value -used + current free- and to False to '
|
||||||
|
'report a static value -quota max bytes if defined and '
|
||||||
|
'global size of cluster if not-.'),
|
||||||
]
|
]
|
||||||
|
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
@ -360,22 +365,79 @@ class RBDDriver(driver.CloneableImageVD,
|
|||||||
ports.append(port)
|
ports.append(port)
|
||||||
return hosts, ports
|
return hosts, ports
|
||||||
|
|
||||||
def _iterate_cb(self, offset, length, exists):
|
|
||||||
if exists:
|
|
||||||
self._total_usage += length
|
|
||||||
|
|
||||||
def _get_usage_info(self):
|
def _get_usage_info(self):
|
||||||
|
"""Calculate provisioned volume space in GiB.
|
||||||
|
|
||||||
|
Stats report should send provisioned size of volumes (snapshot must not
|
||||||
|
be included) and not the physical size of those volumes.
|
||||||
|
|
||||||
|
We must include all volumes, not only Cinder created volumes, because
|
||||||
|
Cinder created volumes are reported by the Cinder core code as
|
||||||
|
allocated_capacity_gb.
|
||||||
|
"""
|
||||||
|
total_provisioned = 0
|
||||||
with RADOSClient(self) as client:
|
with RADOSClient(self) as client:
|
||||||
for t in self.RBDProxy().list(client.ioctx):
|
for t in self.RBDProxy().list(client.ioctx):
|
||||||
if t.startswith('volume'):
|
with RBDVolumeProxy(self, t, read_only=True) as v:
|
||||||
# Only check for "volume" to allow some flexibility with
|
|
||||||
# non-default volume_name_template settings. Template
|
|
||||||
# must start with "volume".
|
|
||||||
try:
|
try:
|
||||||
with RBDVolumeProxy(self, t, read_only=True) as v:
|
size = v.size()
|
||||||
v.diff_iterate(0, v.size(), None, self._iterate_cb)
|
|
||||||
except self.rbd.ImageNotFound:
|
except self.rbd.ImageNotFound:
|
||||||
LOG.debug("Image %s is not found.", t)
|
LOG.debug("Image %s is not found.", t)
|
||||||
|
else:
|
||||||
|
total_provisioned += size
|
||||||
|
|
||||||
|
total_provisioned = math.ceil(float(total_provisioned) / units.Gi)
|
||||||
|
return total_provisioned
|
||||||
|
|
||||||
|
def _get_pool_stats(self):
|
||||||
|
"""Gets pool free and total capacity in GiB.
|
||||||
|
|
||||||
|
Calculate free and total capacity of the pool based on the pool's
|
||||||
|
defined quota and pools stats.
|
||||||
|
|
||||||
|
Returns a tuple with (free, total) where they are either unknown or a
|
||||||
|
real number with a 2 digit precision.
|
||||||
|
"""
|
||||||
|
pool_name = self.configuration.rbd_pool
|
||||||
|
|
||||||
|
with RADOSClient(self) as client:
|
||||||
|
ret, df_outbuf, __ = client.cluster.mon_command(
|
||||||
|
'{"prefix":"df", "format":"json"}', '')
|
||||||
|
if ret:
|
||||||
|
LOG.warning('Unable to get rados pool stats.')
|
||||||
|
return 'unknown', 'unknown'
|
||||||
|
|
||||||
|
ret, quota_outbuf, __ = client.cluster.mon_command(
|
||||||
|
'{"prefix":"osd pool get-quota", "pool": "%s",'
|
||||||
|
' "format":"json"}' % pool_name, '')
|
||||||
|
if ret:
|
||||||
|
LOG.warning('Unable to get rados pool quotas.')
|
||||||
|
return 'unknown', 'unknown'
|
||||||
|
|
||||||
|
df_data = json.loads(df_outbuf)
|
||||||
|
pool_stats = [pool for pool in df_data['pools']
|
||||||
|
if pool['name'] == pool_name][0]['stats']
|
||||||
|
|
||||||
|
bytes_quota = json.loads(quota_outbuf)['quota_max_bytes']
|
||||||
|
# With quota the total is the quota limit and free is quota - used
|
||||||
|
if bytes_quota:
|
||||||
|
total_capacity = bytes_quota
|
||||||
|
free_capacity = max(min(total_capacity - pool_stats['bytes_used'],
|
||||||
|
pool_stats['max_avail']),
|
||||||
|
0)
|
||||||
|
# Without quota free is pools max available and total is global size
|
||||||
|
else:
|
||||||
|
total_capacity = df_data['stats']['total_bytes']
|
||||||
|
free_capacity = pool_stats['max_avail']
|
||||||
|
|
||||||
|
# If we want dynamic total capacity (default behavior)
|
||||||
|
if self.configuration.safe_get('report_dynamic_total_capacity'):
|
||||||
|
total_capacity = free_capacity + pool_stats['bytes_used']
|
||||||
|
|
||||||
|
free_capacity = round((float(free_capacity) / units.Gi), 2)
|
||||||
|
total_capacity = round((float(total_capacity) / units.Gi), 2)
|
||||||
|
|
||||||
|
return free_capacity, total_capacity
|
||||||
|
|
||||||
def _update_volume_stats(self):
|
def _update_volume_stats(self):
|
||||||
stats = {
|
stats = {
|
||||||
@ -401,27 +463,12 @@ class RBDDriver(driver.CloneableImageVD,
|
|||||||
stats['replication_targets'] = self._target_names
|
stats['replication_targets'] = self._target_names
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with RADOSClient(self) as client:
|
free_capacity, total_capacity = self._get_pool_stats()
|
||||||
ret, outbuf, _outs = client.cluster.mon_command(
|
stats['free_capacity_gb'] = free_capacity
|
||||||
'{"prefix":"df", "format":"json"}', '')
|
stats['total_capacity_gb'] = total_capacity
|
||||||
if ret != 0:
|
|
||||||
LOG.warning('Unable to get rados pool stats.')
|
|
||||||
else:
|
|
||||||
outbuf = json.loads(outbuf)
|
|
||||||
pool_stats = [pool for pool in outbuf['pools'] if
|
|
||||||
pool['name'] ==
|
|
||||||
self.configuration.rbd_pool][0]['stats']
|
|
||||||
stats['free_capacity_gb'] = round((float(
|
|
||||||
pool_stats['max_avail']) / units.Gi), 2)
|
|
||||||
used_capacity_gb = float(
|
|
||||||
pool_stats['bytes_used']) / units.Gi
|
|
||||||
stats['total_capacity_gb'] = round(
|
|
||||||
(stats['free_capacity_gb'] + used_capacity_gb), 2)
|
|
||||||
|
|
||||||
self._total_usage = 0
|
total_gbi = self._get_usage_info()
|
||||||
self._get_usage_info()
|
stats['provisioned_capacity_gb'] = total_gbi
|
||||||
total_usage_gb = math.ceil(float(self._total_usage) / units.Gi)
|
|
||||||
stats['provisioned_capacity_gb'] = total_usage_gb
|
|
||||||
except self.rados.Error:
|
except self.rados.Error:
|
||||||
# just log and return unknown capacities
|
# just log and return unknown capacities
|
||||||
LOG.exception('error refreshing volume stats')
|
LOG.exception('error refreshing volume stats')
|
||||||
|
18
releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml
Normal file
18
releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
RBD driver supports returning a static total capacity value instead of a
|
||||||
|
dynamic value like it's been doing. Configurable with
|
||||||
|
`report_dynamic_total_capacity` configuration option.
|
||||||
|
upgrade:
|
||||||
|
- |
|
||||||
|
RBD/Ceph backends should adjust `max_over_subscription_ratio` to take into
|
||||||
|
account that the driver is no longer reporting volume's physical usage but
|
||||||
|
it's provisioned size.
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
RBD stats report has been fixed, now properly reports
|
||||||
|
`allocated_capacity_gb` and `provisioned_capacity_gb` with the sum of the
|
||||||
|
sizes of the volumes (not physical sizes) for volumes created by Cinder and
|
||||||
|
all available in the pool respectively. Free capacity will now properly
|
||||||
|
handle quota size restrictions of the pool.
|
Loading…
x
Reference in New Issue
Block a user