RBD: Fix stats reporting
Current RBD code is incorrectly reporting the stats of the pool in the following ways: - `provisioned_capacity_gb` contains physical space used by cinder created volumes. - `free_capacity_gb` is not taking into account that pools can have quota restrictions and they should be used as the reference for the free capacity. - `total_capacity` dynamically changes, which means that there is no way to have a fixed over provisioning capacity. This patch fixes the stats reporting making sure we return the right values in `provisioned_capacity_gb` and `free_capacity_gb`, and allows us to use a static calculation of the `total_capacity` using `report_dynamic_total_capacity` configuration option. We don't report `allocated_capacity_gb` because this is something that is calculated by the Cinder core code and should not be reported by drivers, even if it's not currently working as expected [1][2]. [1] https://bugs.launchpad.net/cinder/+bug/1712549 [2] https://bugs.launchpad.net/cinder/+bug/1706057 Change-Id: I1e82bf9d0b6cc0fb1d1fc2dd8b8ccc59aea3f73f Closes-Bug: #1706060
This commit is contained in:
parent
42746c68dd
commit
8469109016
@ -1121,19 +1121,13 @@ class RBDTestCase(test.TestCase):
|
||||
|
||||
@ddt.data(True, False)
|
||||
@common_mocks
|
||||
def test_update_volume_stats(self, replication_enabled):
|
||||
client = self.mock_client.return_value
|
||||
client.__enter__.return_value = client
|
||||
|
||||
client.cluster = mock.Mock()
|
||||
client.cluster.mon_command = mock.Mock()
|
||||
client.cluster.mon_command.return_value = (
|
||||
0, '{"stats":{"total_bytes":64385286144,'
|
||||
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
|
||||
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
|
||||
'"bytes_used":1546440971,"max_avail":28987613184,"objects":412}},'
|
||||
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
|
||||
'"max_avail":28987613184,"objects":0}}]}\n', '')
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
|
||||
def test_update_volume_stats(self, replication_enabled, stats_mock,
|
||||
usage_mock):
|
||||
stats_mock.return_value = (mock.sentinel.free_capacity_gb,
|
||||
mock.sentinel.total_capacity_gb)
|
||||
usage_mock.return_value = mock.sentinel.provisioned_capacity_gb
|
||||
|
||||
expected = dict(
|
||||
volume_backend_name='RBD',
|
||||
@ -1141,11 +1135,11 @@ class RBDTestCase(test.TestCase):
|
||||
vendor_name='Open Source',
|
||||
driver_version=self.driver.VERSION,
|
||||
storage_protocol='ceph',
|
||||
total_capacity_gb=28.44,
|
||||
free_capacity_gb=27.0,
|
||||
total_capacity_gb=mock.sentinel.total_capacity_gb,
|
||||
free_capacity_gb=mock.sentinel.free_capacity_gb,
|
||||
reserved_percentage=0,
|
||||
thin_provisioning_support=True,
|
||||
provisioned_capacity_gb=0.0,
|
||||
provisioned_capacity_gb=mock.sentinel.provisioned_capacity_gb,
|
||||
max_over_subscription_ratio=1.0,
|
||||
multiattach=False)
|
||||
|
||||
@ -1162,19 +1156,12 @@ class RBDTestCase(test.TestCase):
|
||||
mock_driver_configuration)
|
||||
|
||||
actual = self.driver.get_volume_stats(True)
|
||||
client.cluster.mon_command.assert_called_once_with(
|
||||
'{"prefix":"df", "format":"json"}', '')
|
||||
self.assertDictEqual(expected, actual)
|
||||
|
||||
@common_mocks
|
||||
def test_update_volume_stats_error(self):
|
||||
client = self.mock_client.return_value
|
||||
client.__enter__.return_value = client
|
||||
|
||||
client.cluster = mock.Mock()
|
||||
client.cluster.mon_command = mock.Mock()
|
||||
client.cluster.mon_command.return_value = (22, '', '')
|
||||
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
|
||||
def test_update_volume_stats_error(self, stats_mock, usage_mock):
|
||||
self.mock_object(self.driver.configuration, 'safe_get',
|
||||
mock_driver_configuration)
|
||||
|
||||
@ -1187,15 +1174,66 @@ class RBDTestCase(test.TestCase):
|
||||
free_capacity_gb='unknown',
|
||||
reserved_percentage=0,
|
||||
multiattach=False,
|
||||
provisioned_capacity_gb=0.0,
|
||||
provisioned_capacity_gb=0,
|
||||
max_over_subscription_ratio=1.0,
|
||||
thin_provisioning_support=True)
|
||||
|
||||
actual = self.driver.get_volume_stats(True)
|
||||
client.cluster.mon_command.assert_called_once_with(
|
||||
'{"prefix":"df", "format":"json"}', '')
|
||||
self.assertDictEqual(expected, actual)
|
||||
|
||||
@ddt.data(
|
||||
# Normal case, no quota and dynamic total
|
||||
{'free_capacity': 27.0, 'total_capacity': 28.44},
|
||||
# No quota and static total
|
||||
{'dynamic_total': False,
|
||||
'free_capacity': 27.0, 'total_capacity': 59.96},
|
||||
# Quota and dynamic total
|
||||
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
|
||||
'free_capacity': 1, 'total_capacity': 2.44},
|
||||
# Quota and static total
|
||||
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
|
||||
'dynamic_total': False,
|
||||
'free_capacity': 1, 'total_capacity': 3.00},
|
||||
# Quota and dynamic total when free would be negative
|
||||
{'quota_max_bytes': 1073741824,
|
||||
'free_capacity': 0, 'total_capacity': 1.44},
|
||||
)
|
||||
@ddt.unpack
|
||||
@common_mocks
|
||||
def test_get_pool(self, free_capacity, total_capacity,
|
||||
max_avail=28987613184, quota_max_bytes=0,
|
||||
dynamic_total=True):
|
||||
client = self.mock_client.return_value
|
||||
client.__enter__.return_value = client
|
||||
client.cluster.mon_command.side_effect = [
|
||||
(0, '{"stats":{"total_bytes":64385286144,'
|
||||
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
|
||||
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
|
||||
'"bytes_used":1546440971,"max_avail":%s,"objects":412}},'
|
||||
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
|
||||
'"max_avail":28987613184,"objects":0}}]}\n' % max_avail, ''),
|
||||
(0, '{"pool_name":"volumes","pool_id":4,"quota_max_objects":0,'
|
||||
'"quota_max_bytes":%s}\n' % quota_max_bytes, ''),
|
||||
]
|
||||
with mock.patch.object(self.driver.configuration, 'safe_get',
|
||||
return_value=dynamic_total):
|
||||
result = self.driver._get_pool_stats()
|
||||
client.cluster.mon_command.assert_has_calls([
|
||||
mock.call('{"prefix":"df", "format":"json"}', ''),
|
||||
mock.call('{"prefix":"osd pool get-quota", "pool": "rbd",'
|
||||
' "format":"json"}', ''),
|
||||
])
|
||||
self.assertEqual((free_capacity, total_capacity), result)
|
||||
|
||||
@common_mocks
|
||||
def test_get_pool_stats_failure(self):
|
||||
client = self.mock_client.return_value
|
||||
client.__enter__.return_value = client
|
||||
client.cluster.mon_command.return_value = (-1, '', '')
|
||||
|
||||
result = self.driver._get_pool_stats()
|
||||
self.assertEqual(('unknown', 'unknown'), result)
|
||||
|
||||
@common_mocks
|
||||
def test_get_mon_addrs(self):
|
||||
with mock.patch.object(self.driver, '_execute') as mock_execute:
|
||||
@ -1788,32 +1826,42 @@ class RBDTestCase(test.TestCase):
|
||||
self.assertEqual(RAISED_EXCEPTIONS,
|
||||
[self.mock_rbd.ImageExists])
|
||||
|
||||
@ddt.data({'image_size': [1, 1], 'total_usage': 2},
|
||||
{'image_size': MockImageNotFoundException, 'total_usage': 0})
|
||||
@ddt.unpack
|
||||
@mock.patch.object(driver, 'RADOSClient')
|
||||
@mock.patch.object(driver, 'RBDVolumeProxy')
|
||||
def test__get_usage_info(self, volume_proxy, mock_rados_client,
|
||||
image_size, total_usage):
|
||||
class FakeRBDProxy(object):
|
||||
def list(self, ioctx):
|
||||
return ['volume-1', 'volume-2']
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDVolumeProxy')
|
||||
@mock.patch('cinder.volume.drivers.rbd.RADOSClient')
|
||||
@mock.patch('cinder.volume.drivers.rbd.RBDDriver.RBDProxy')
|
||||
def test__get_usage_info(self, rbdproxy_mock, client_mock, volproxy_mock):
|
||||
def FakeVolProxy(size):
|
||||
if size == -1:
|
||||
size_mock = mock.Mock(side_effect=MockImageNotFoundException)
|
||||
else:
|
||||
size_mock = mock.Mock(return_value=size * units.Gi)
|
||||
return mock.Mock(return_value=mock.Mock(size=size_mock))
|
||||
|
||||
def diff_iterate(offset, length, from_snapshot, iterate_cb):
|
||||
self.driver._iterate_cb(offset, length, True)
|
||||
volumes = ['volume-1', 'non-existent', 'non-cinder-volume']
|
||||
|
||||
client = client_mock.return_value.__enter__.return_value
|
||||
rbdproxy_mock.return_value.list.return_value = volumes
|
||||
|
||||
volproxy_mock.side_effect = [
|
||||
mock.Mock(**{'__enter__': FakeVolProxy(1.0),
|
||||
'__exit__': mock.Mock()}),
|
||||
mock.Mock(**{'__enter__': FakeVolProxy(-1),
|
||||
'__exit__': mock.Mock()}),
|
||||
mock.Mock(**{'__enter__': FakeVolProxy(2.0),
|
||||
'__exit__': mock.Mock()})
|
||||
]
|
||||
|
||||
self.driver._total_usage = 0
|
||||
with mock.patch.object(self.driver, 'RBDProxy') as rbd_proxy:
|
||||
with mock.patch.object(self.driver, 'rbd') as mock_rbd:
|
||||
mock_rbd.ImageNotFound = MockImageNotFoundException
|
||||
proxy_list = mock.Mock()
|
||||
proxy_list.side_effect = ['volume-1', 'volume-2']
|
||||
rbd_proxy.return_value = FakeRBDProxy()
|
||||
image = volume_proxy.return_value.__enter__.return_value
|
||||
image.size.side_effect = image_size
|
||||
image.diff_iterate.side_effect = diff_iterate
|
||||
self.driver._get_usage_info()
|
||||
self.assertEqual(total_usage, self.driver._total_usage)
|
||||
total_provision = self.driver._get_usage_info()
|
||||
|
||||
rbdproxy_mock.return_value.list.assert_called_once_with(client.ioctx)
|
||||
volproxy_mock.assert_has_calls([
|
||||
mock.call(self.driver, volumes[0], read_only=True),
|
||||
mock.call(self.driver, volumes[1], read_only=True),
|
||||
])
|
||||
|
||||
self.assertEqual(3.00, total_provision)
|
||||
|
||||
|
||||
class ManagedRBDTestCase(test_driver.BaseDriverTestCase):
|
||||
|
@ -92,6 +92,11 @@ RBD_OPTS = [
|
||||
'ceph cluster to do a demotion/promotion of volumes. '
|
||||
'If value < 0, no timeout is set and default librados '
|
||||
'value is used.'),
|
||||
cfg.BoolOpt('report_dynamic_total_capacity', default=True,
|
||||
help='Set to True for driver to report total capacity as a '
|
||||
'dynamic value -used + current free- and to False to '
|
||||
'report a static value -quota max bytes if defined and '
|
||||
'global size of cluster if not-.'),
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
@ -360,22 +365,79 @@ class RBDDriver(driver.CloneableImageVD,
|
||||
ports.append(port)
|
||||
return hosts, ports
|
||||
|
||||
def _iterate_cb(self, offset, length, exists):
|
||||
if exists:
|
||||
self._total_usage += length
|
||||
|
||||
def _get_usage_info(self):
|
||||
"""Calculate provisioned volume space in GiB.
|
||||
|
||||
Stats report should send provisioned size of volumes (snapshot must not
|
||||
be included) and not the physical size of those volumes.
|
||||
|
||||
We must include all volumes, not only Cinder created volumes, because
|
||||
Cinder created volumes are reported by the Cinder core code as
|
||||
allocated_capacity_gb.
|
||||
"""
|
||||
total_provisioned = 0
|
||||
with RADOSClient(self) as client:
|
||||
for t in self.RBDProxy().list(client.ioctx):
|
||||
if t.startswith('volume'):
|
||||
# Only check for "volume" to allow some flexibility with
|
||||
# non-default volume_name_template settings. Template
|
||||
# must start with "volume".
|
||||
try:
|
||||
with RBDVolumeProxy(self, t, read_only=True) as v:
|
||||
v.diff_iterate(0, v.size(), None, self._iterate_cb)
|
||||
try:
|
||||
size = v.size()
|
||||
except self.rbd.ImageNotFound:
|
||||
LOG.debug("Image %s is not found.", t)
|
||||
else:
|
||||
total_provisioned += size
|
||||
|
||||
total_provisioned = math.ceil(float(total_provisioned) / units.Gi)
|
||||
return total_provisioned
|
||||
|
||||
def _get_pool_stats(self):
|
||||
"""Gets pool free and total capacity in GiB.
|
||||
|
||||
Calculate free and total capacity of the pool based on the pool's
|
||||
defined quota and pools stats.
|
||||
|
||||
Returns a tuple with (free, total) where they are either unknown or a
|
||||
real number with a 2 digit precision.
|
||||
"""
|
||||
pool_name = self.configuration.rbd_pool
|
||||
|
||||
with RADOSClient(self) as client:
|
||||
ret, df_outbuf, __ = client.cluster.mon_command(
|
||||
'{"prefix":"df", "format":"json"}', '')
|
||||
if ret:
|
||||
LOG.warning('Unable to get rados pool stats.')
|
||||
return 'unknown', 'unknown'
|
||||
|
||||
ret, quota_outbuf, __ = client.cluster.mon_command(
|
||||
'{"prefix":"osd pool get-quota", "pool": "%s",'
|
||||
' "format":"json"}' % pool_name, '')
|
||||
if ret:
|
||||
LOG.warning('Unable to get rados pool quotas.')
|
||||
return 'unknown', 'unknown'
|
||||
|
||||
df_data = json.loads(df_outbuf)
|
||||
pool_stats = [pool for pool in df_data['pools']
|
||||
if pool['name'] == pool_name][0]['stats']
|
||||
|
||||
bytes_quota = json.loads(quota_outbuf)['quota_max_bytes']
|
||||
# With quota the total is the quota limit and free is quota - used
|
||||
if bytes_quota:
|
||||
total_capacity = bytes_quota
|
||||
free_capacity = max(min(total_capacity - pool_stats['bytes_used'],
|
||||
pool_stats['max_avail']),
|
||||
0)
|
||||
# Without quota free is pools max available and total is global size
|
||||
else:
|
||||
total_capacity = df_data['stats']['total_bytes']
|
||||
free_capacity = pool_stats['max_avail']
|
||||
|
||||
# If we want dynamic total capacity (default behavior)
|
||||
if self.configuration.safe_get('report_dynamic_total_capacity'):
|
||||
total_capacity = free_capacity + pool_stats['bytes_used']
|
||||
|
||||
free_capacity = round((float(free_capacity) / units.Gi), 2)
|
||||
total_capacity = round((float(total_capacity) / units.Gi), 2)
|
||||
|
||||
return free_capacity, total_capacity
|
||||
|
||||
def _update_volume_stats(self):
|
||||
stats = {
|
||||
@ -401,27 +463,12 @@ class RBDDriver(driver.CloneableImageVD,
|
||||
stats['replication_targets'] = self._target_names
|
||||
|
||||
try:
|
||||
with RADOSClient(self) as client:
|
||||
ret, outbuf, _outs = client.cluster.mon_command(
|
||||
'{"prefix":"df", "format":"json"}', '')
|
||||
if ret != 0:
|
||||
LOG.warning('Unable to get rados pool stats.')
|
||||
else:
|
||||
outbuf = json.loads(outbuf)
|
||||
pool_stats = [pool for pool in outbuf['pools'] if
|
||||
pool['name'] ==
|
||||
self.configuration.rbd_pool][0]['stats']
|
||||
stats['free_capacity_gb'] = round((float(
|
||||
pool_stats['max_avail']) / units.Gi), 2)
|
||||
used_capacity_gb = float(
|
||||
pool_stats['bytes_used']) / units.Gi
|
||||
stats['total_capacity_gb'] = round(
|
||||
(stats['free_capacity_gb'] + used_capacity_gb), 2)
|
||||
free_capacity, total_capacity = self._get_pool_stats()
|
||||
stats['free_capacity_gb'] = free_capacity
|
||||
stats['total_capacity_gb'] = total_capacity
|
||||
|
||||
self._total_usage = 0
|
||||
self._get_usage_info()
|
||||
total_usage_gb = math.ceil(float(self._total_usage) / units.Gi)
|
||||
stats['provisioned_capacity_gb'] = total_usage_gb
|
||||
total_gbi = self._get_usage_info()
|
||||
stats['provisioned_capacity_gb'] = total_gbi
|
||||
except self.rados.Error:
|
||||
# just log and return unknown capacities
|
||||
LOG.exception('error refreshing volume stats')
|
||||
|
18
releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml
Normal file
18
releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
RBD driver supports returning a static total capacity value instead of a
|
||||
dynamic value like it's been doing. Configurable with
|
||||
`report_dynamic_total_capacity` configuration option.
|
||||
upgrade:
|
||||
- |
|
||||
RBD/Ceph backends should adjust `max_over_subscription_ratio` to take into
|
||||
account that the driver is no longer reporting volume's physical usage but
|
||||
it's provisioned size.
|
||||
fixes:
|
||||
- |
|
||||
RBD stats report has been fixed, now properly reports
|
||||
`allocated_capacity_gb` and `provisioned_capacity_gb` with the sum of the
|
||||
sizes of the volumes (not physical sizes) for volumes created by Cinder and
|
||||
all available in the pool respectively. Free capacity will now properly
|
||||
handle quota size restrictions of the pool.
|
Loading…
Reference in New Issue
Block a user