RBD: Fix stats reporting

Current RBD code is incorrectly reporting the stats of the pool in the
following ways:

- `provisioned_capacity_gb` contains physical space used by cinder created
  volumes.
- `free_capacity_gb` is not taking into account that pools can have quota
  restrictions and they should be used as the reference for the free capacity.
- `total_capacity` dynamically changes, which means that there is no way to
  have a fixed over provisioning capacity.

This patch fixes the stats reporting making sure we return the right
values in `provisioned_capacity_gb` and `free_capacity_gb`, and allows
us to use a static calculation of the `total_capacity` using
`report_dynamic_total_capacity` configuration option.

We don't report `allocated_capacity_gb` because this is something that
is calculated by the Cinder core code and should not be reported by
drivers, even if it's not currently working as expected [1][2].

[1] https://bugs.launchpad.net/cinder/+bug/1712549
[2] https://bugs.launchpad.net/cinder/+bug/1706057

Change-Id: I1e82bf9d0b6cc0fb1d1fc2dd8b8ccc59aea3f73f
Closes-Bug: #1706060
This commit is contained in:
Gorka Eguileor 2017-08-03 18:50:12 +02:00
parent 42746c68dd
commit 8469109016
3 changed files with 196 additions and 83 deletions

View File

@ -1121,19 +1121,13 @@ class RBDTestCase(test.TestCase):
@ddt.data(True, False) @ddt.data(True, False)
@common_mocks @common_mocks
def test_update_volume_stats(self, replication_enabled): @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
client = self.mock_client.return_value @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
client.__enter__.return_value = client def test_update_volume_stats(self, replication_enabled, stats_mock,
usage_mock):
client.cluster = mock.Mock() stats_mock.return_value = (mock.sentinel.free_capacity_gb,
client.cluster.mon_command = mock.Mock() mock.sentinel.total_capacity_gb)
client.cluster.mon_command.return_value = ( usage_mock.return_value = mock.sentinel.provisioned_capacity_gb
0, '{"stats":{"total_bytes":64385286144,'
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
'"bytes_used":1546440971,"max_avail":28987613184,"objects":412}},'
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
'"max_avail":28987613184,"objects":0}}]}\n', '')
expected = dict( expected = dict(
volume_backend_name='RBD', volume_backend_name='RBD',
@ -1141,11 +1135,11 @@ class RBDTestCase(test.TestCase):
vendor_name='Open Source', vendor_name='Open Source',
driver_version=self.driver.VERSION, driver_version=self.driver.VERSION,
storage_protocol='ceph', storage_protocol='ceph',
total_capacity_gb=28.44, total_capacity_gb=mock.sentinel.total_capacity_gb,
free_capacity_gb=27.0, free_capacity_gb=mock.sentinel.free_capacity_gb,
reserved_percentage=0, reserved_percentage=0,
thin_provisioning_support=True, thin_provisioning_support=True,
provisioned_capacity_gb=0.0, provisioned_capacity_gb=mock.sentinel.provisioned_capacity_gb,
max_over_subscription_ratio=1.0, max_over_subscription_ratio=1.0,
multiattach=False) multiattach=False)
@ -1162,19 +1156,12 @@ class RBDTestCase(test.TestCase):
mock_driver_configuration) mock_driver_configuration)
actual = self.driver.get_volume_stats(True) actual = self.driver.get_volume_stats(True)
client.cluster.mon_command.assert_called_once_with(
'{"prefix":"df", "format":"json"}', '')
self.assertDictEqual(expected, actual) self.assertDictEqual(expected, actual)
@common_mocks @common_mocks
def test_update_volume_stats_error(self): @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
client = self.mock_client.return_value @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
client.__enter__.return_value = client def test_update_volume_stats_error(self, stats_mock, usage_mock):
client.cluster = mock.Mock()
client.cluster.mon_command = mock.Mock()
client.cluster.mon_command.return_value = (22, '', '')
self.mock_object(self.driver.configuration, 'safe_get', self.mock_object(self.driver.configuration, 'safe_get',
mock_driver_configuration) mock_driver_configuration)
@ -1187,15 +1174,66 @@ class RBDTestCase(test.TestCase):
free_capacity_gb='unknown', free_capacity_gb='unknown',
reserved_percentage=0, reserved_percentage=0,
multiattach=False, multiattach=False,
provisioned_capacity_gb=0.0, provisioned_capacity_gb=0,
max_over_subscription_ratio=1.0, max_over_subscription_ratio=1.0,
thin_provisioning_support=True) thin_provisioning_support=True)
actual = self.driver.get_volume_stats(True) actual = self.driver.get_volume_stats(True)
client.cluster.mon_command.assert_called_once_with(
'{"prefix":"df", "format":"json"}', '')
self.assertDictEqual(expected, actual) self.assertDictEqual(expected, actual)
@ddt.data(
# Normal case, no quota and dynamic total
{'free_capacity': 27.0, 'total_capacity': 28.44},
# No quota and static total
{'dynamic_total': False,
'free_capacity': 27.0, 'total_capacity': 59.96},
# Quota and dynamic total
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
'free_capacity': 1, 'total_capacity': 2.44},
# Quota and static total
{'quota_max_bytes': 3221225472, 'max_avail': 1073741824,
'dynamic_total': False,
'free_capacity': 1, 'total_capacity': 3.00},
# Quota and dynamic total when free would be negative
{'quota_max_bytes': 1073741824,
'free_capacity': 0, 'total_capacity': 1.44},
)
@ddt.unpack
@common_mocks
def test_get_pool(self, free_capacity, total_capacity,
max_avail=28987613184, quota_max_bytes=0,
dynamic_total=True):
client = self.mock_client.return_value
client.__enter__.return_value = client
client.cluster.mon_command.side_effect = [
(0, '{"stats":{"total_bytes":64385286144,'
'"total_used_bytes":3289628672,"total_avail_bytes":61095657472},'
'"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,'
'"bytes_used":1546440971,"max_avail":%s,"objects":412}},'
'{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,'
'"max_avail":28987613184,"objects":0}}]}\n' % max_avail, ''),
(0, '{"pool_name":"volumes","pool_id":4,"quota_max_objects":0,'
'"quota_max_bytes":%s}\n' % quota_max_bytes, ''),
]
with mock.patch.object(self.driver.configuration, 'safe_get',
return_value=dynamic_total):
result = self.driver._get_pool_stats()
client.cluster.mon_command.assert_has_calls([
mock.call('{"prefix":"df", "format":"json"}', ''),
mock.call('{"prefix":"osd pool get-quota", "pool": "rbd",'
' "format":"json"}', ''),
])
self.assertEqual((free_capacity, total_capacity), result)
@common_mocks
def test_get_pool_stats_failure(self):
client = self.mock_client.return_value
client.__enter__.return_value = client
client.cluster.mon_command.return_value = (-1, '', '')
result = self.driver._get_pool_stats()
self.assertEqual(('unknown', 'unknown'), result)
@common_mocks @common_mocks
def test_get_mon_addrs(self): def test_get_mon_addrs(self):
with mock.patch.object(self.driver, '_execute') as mock_execute: with mock.patch.object(self.driver, '_execute') as mock_execute:
@ -1788,32 +1826,42 @@ class RBDTestCase(test.TestCase):
self.assertEqual(RAISED_EXCEPTIONS, self.assertEqual(RAISED_EXCEPTIONS,
[self.mock_rbd.ImageExists]) [self.mock_rbd.ImageExists])
@ddt.data({'image_size': [1, 1], 'total_usage': 2}, @mock.patch('cinder.volume.drivers.rbd.RBDVolumeProxy')
{'image_size': MockImageNotFoundException, 'total_usage': 0}) @mock.patch('cinder.volume.drivers.rbd.RADOSClient')
@ddt.unpack @mock.patch('cinder.volume.drivers.rbd.RBDDriver.RBDProxy')
@mock.patch.object(driver, 'RADOSClient') def test__get_usage_info(self, rbdproxy_mock, client_mock, volproxy_mock):
@mock.patch.object(driver, 'RBDVolumeProxy') def FakeVolProxy(size):
def test__get_usage_info(self, volume_proxy, mock_rados_client, if size == -1:
image_size, total_usage): size_mock = mock.Mock(side_effect=MockImageNotFoundException)
class FakeRBDProxy(object): else:
def list(self, ioctx): size_mock = mock.Mock(return_value=size * units.Gi)
return ['volume-1', 'volume-2'] return mock.Mock(return_value=mock.Mock(size=size_mock))
def diff_iterate(offset, length, from_snapshot, iterate_cb): volumes = ['volume-1', 'non-existent', 'non-cinder-volume']
self.driver._iterate_cb(offset, length, True)
self.driver._total_usage = 0 client = client_mock.return_value.__enter__.return_value
with mock.patch.object(self.driver, 'RBDProxy') as rbd_proxy: rbdproxy_mock.return_value.list.return_value = volumes
with mock.patch.object(self.driver, 'rbd') as mock_rbd:
mock_rbd.ImageNotFound = MockImageNotFoundException volproxy_mock.side_effect = [
proxy_list = mock.Mock() mock.Mock(**{'__enter__': FakeVolProxy(1.0),
proxy_list.side_effect = ['volume-1', 'volume-2'] '__exit__': mock.Mock()}),
rbd_proxy.return_value = FakeRBDProxy() mock.Mock(**{'__enter__': FakeVolProxy(-1),
image = volume_proxy.return_value.__enter__.return_value '__exit__': mock.Mock()}),
image.size.side_effect = image_size mock.Mock(**{'__enter__': FakeVolProxy(2.0),
image.diff_iterate.side_effect = diff_iterate '__exit__': mock.Mock()})
self.driver._get_usage_info() ]
self.assertEqual(total_usage, self.driver._total_usage)
with mock.patch.object(self.driver, 'rbd') as mock_rbd:
mock_rbd.ImageNotFound = MockImageNotFoundException
total_provision = self.driver._get_usage_info()
rbdproxy_mock.return_value.list.assert_called_once_with(client.ioctx)
volproxy_mock.assert_has_calls([
mock.call(self.driver, volumes[0], read_only=True),
mock.call(self.driver, volumes[1], read_only=True),
])
self.assertEqual(3.00, total_provision)
class ManagedRBDTestCase(test_driver.BaseDriverTestCase): class ManagedRBDTestCase(test_driver.BaseDriverTestCase):

View File

@ -92,6 +92,11 @@ RBD_OPTS = [
'ceph cluster to do a demotion/promotion of volumes. ' 'ceph cluster to do a demotion/promotion of volumes. '
'If value < 0, no timeout is set and default librados ' 'If value < 0, no timeout is set and default librados '
'value is used.'), 'value is used.'),
cfg.BoolOpt('report_dynamic_total_capacity', default=True,
help='Set to True for driver to report total capacity as a '
'dynamic value -used + current free- and to False to '
'report a static value -quota max bytes if defined and '
'global size of cluster if not-.'),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -360,22 +365,79 @@ class RBDDriver(driver.CloneableImageVD,
ports.append(port) ports.append(port)
return hosts, ports return hosts, ports
def _iterate_cb(self, offset, length, exists):
if exists:
self._total_usage += length
def _get_usage_info(self): def _get_usage_info(self):
"""Calculate provisioned volume space in GiB.
Stats report should send provisioned size of volumes (snapshot must not
be included) and not the physical size of those volumes.
We must include all volumes, not only Cinder created volumes, because
Cinder created volumes are reported by the Cinder core code as
allocated_capacity_gb.
"""
total_provisioned = 0
with RADOSClient(self) as client: with RADOSClient(self) as client:
for t in self.RBDProxy().list(client.ioctx): for t in self.RBDProxy().list(client.ioctx):
if t.startswith('volume'): with RBDVolumeProxy(self, t, read_only=True) as v:
# Only check for "volume" to allow some flexibility with
# non-default volume_name_template settings. Template
# must start with "volume".
try: try:
with RBDVolumeProxy(self, t, read_only=True) as v: size = v.size()
v.diff_iterate(0, v.size(), None, self._iterate_cb)
except self.rbd.ImageNotFound: except self.rbd.ImageNotFound:
LOG.debug("Image %s is not found.", t) LOG.debug("Image %s is not found.", t)
else:
total_provisioned += size
total_provisioned = math.ceil(float(total_provisioned) / units.Gi)
return total_provisioned
def _get_pool_stats(self):
"""Gets pool free and total capacity in GiB.
Calculate free and total capacity of the pool based on the pool's
defined quota and pools stats.
Returns a tuple with (free, total) where they are either unknown or a
real number with a 2 digit precision.
"""
pool_name = self.configuration.rbd_pool
with RADOSClient(self) as client:
ret, df_outbuf, __ = client.cluster.mon_command(
'{"prefix":"df", "format":"json"}', '')
if ret:
LOG.warning('Unable to get rados pool stats.')
return 'unknown', 'unknown'
ret, quota_outbuf, __ = client.cluster.mon_command(
'{"prefix":"osd pool get-quota", "pool": "%s",'
' "format":"json"}' % pool_name, '')
if ret:
LOG.warning('Unable to get rados pool quotas.')
return 'unknown', 'unknown'
df_data = json.loads(df_outbuf)
pool_stats = [pool for pool in df_data['pools']
if pool['name'] == pool_name][0]['stats']
bytes_quota = json.loads(quota_outbuf)['quota_max_bytes']
# With quota the total is the quota limit and free is quota - used
if bytes_quota:
total_capacity = bytes_quota
free_capacity = max(min(total_capacity - pool_stats['bytes_used'],
pool_stats['max_avail']),
0)
# Without quota free is pools max available and total is global size
else:
total_capacity = df_data['stats']['total_bytes']
free_capacity = pool_stats['max_avail']
# If we want dynamic total capacity (default behavior)
if self.configuration.safe_get('report_dynamic_total_capacity'):
total_capacity = free_capacity + pool_stats['bytes_used']
free_capacity = round((float(free_capacity) / units.Gi), 2)
total_capacity = round((float(total_capacity) / units.Gi), 2)
return free_capacity, total_capacity
def _update_volume_stats(self): def _update_volume_stats(self):
stats = { stats = {
@ -401,27 +463,12 @@ class RBDDriver(driver.CloneableImageVD,
stats['replication_targets'] = self._target_names stats['replication_targets'] = self._target_names
try: try:
with RADOSClient(self) as client: free_capacity, total_capacity = self._get_pool_stats()
ret, outbuf, _outs = client.cluster.mon_command( stats['free_capacity_gb'] = free_capacity
'{"prefix":"df", "format":"json"}', '') stats['total_capacity_gb'] = total_capacity
if ret != 0:
LOG.warning('Unable to get rados pool stats.')
else:
outbuf = json.loads(outbuf)
pool_stats = [pool for pool in outbuf['pools'] if
pool['name'] ==
self.configuration.rbd_pool][0]['stats']
stats['free_capacity_gb'] = round((float(
pool_stats['max_avail']) / units.Gi), 2)
used_capacity_gb = float(
pool_stats['bytes_used']) / units.Gi
stats['total_capacity_gb'] = round(
(stats['free_capacity_gb'] + used_capacity_gb), 2)
self._total_usage = 0 total_gbi = self._get_usage_info()
self._get_usage_info() stats['provisioned_capacity_gb'] = total_gbi
total_usage_gb = math.ceil(float(self._total_usage) / units.Gi)
stats['provisioned_capacity_gb'] = total_usage_gb
except self.rados.Error: except self.rados.Error:
# just log and return unknown capacities # just log and return unknown capacities
LOG.exception('error refreshing volume stats') LOG.exception('error refreshing volume stats')

View File

@ -0,0 +1,18 @@
---
features:
- |
RBD driver supports returning a static total capacity value instead of a
dynamic value like it's been doing. Configurable with
`report_dynamic_total_capacity` configuration option.
upgrade:
- |
RBD/Ceph backends should adjust `max_over_subscription_ratio` to take into
account that the driver is no longer reporting volume's physical usage but
it's provisioned size.
fixes:
- |
RBD stats report has been fixed, now properly reports
`allocated_capacity_gb` and `provisioned_capacity_gb` with the sum of the
sizes of the volumes (not physical sizes) for volumes created by Cinder and
all available in the pool respectively. Free capacity will now properly
handle quota size restrictions of the pool.