From 8469109016bcfd5806e230202e1996a8ba649535 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Thu, 3 Aug 2017 18:50:12 +0200 Subject: [PATCH] RBD: Fix stats reporting Current RBD code is incorrectly reporting the stats of the pool in the following ways: - `provisioned_capacity_gb` contains physical space used by cinder created volumes. - `free_capacity_gb` is not taking into account that pools can have quota restrictions and they should be used as the reference for the free capacity. - `total_capacity` dynamically changes, which means that there is no way to have a fixed over provisioning capacity. This patch fixes the stats reporting making sure we return the right values in `provisioned_capacity_gb` and `free_capacity_gb`, and allows us to use a static calculation of the `total_capacity` using `report_dynamic_total_capacity` configuration option. We don't report `allocated_capacity_gb` because this is something that is calculated by the Cinder core code and should not be reported by drivers, even if it's not currently working as expected [1][2]. [1] https://bugs.launchpad.net/cinder/+bug/1712549 [2] https://bugs.launchpad.net/cinder/+bug/1706057 Change-Id: I1e82bf9d0b6cc0fb1d1fc2dd8b8ccc59aea3f73f Closes-Bug: #1706060 --- cinder/tests/unit/volume/drivers/test_rbd.py | 154 ++++++++++++------ cinder/volume/drivers/rbd.py | 107 ++++++++---- .../rbd-stats-report-0c7e803bb0b1aedb.yaml | 18 ++ 3 files changed, 196 insertions(+), 83 deletions(-) create mode 100644 releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml diff --git a/cinder/tests/unit/volume/drivers/test_rbd.py b/cinder/tests/unit/volume/drivers/test_rbd.py index 3ad4c59c096..1bee2c24fef 100644 --- a/cinder/tests/unit/volume/drivers/test_rbd.py +++ b/cinder/tests/unit/volume/drivers/test_rbd.py @@ -1121,19 +1121,13 @@ class RBDTestCase(test.TestCase): @ddt.data(True, False) @common_mocks - def test_update_volume_stats(self, replication_enabled): - client = self.mock_client.return_value - client.__enter__.return_value = client - - client.cluster = mock.Mock() - client.cluster.mon_command = mock.Mock() - client.cluster.mon_command.return_value = ( - 0, '{"stats":{"total_bytes":64385286144,' - '"total_used_bytes":3289628672,"total_avail_bytes":61095657472},' - '"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,' - '"bytes_used":1546440971,"max_avail":28987613184,"objects":412}},' - '{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,' - '"max_avail":28987613184,"objects":0}}]}\n', '') + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info') + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats') + def test_update_volume_stats(self, replication_enabled, stats_mock, + usage_mock): + stats_mock.return_value = (mock.sentinel.free_capacity_gb, + mock.sentinel.total_capacity_gb) + usage_mock.return_value = mock.sentinel.provisioned_capacity_gb expected = dict( volume_backend_name='RBD', @@ -1141,11 +1135,11 @@ class RBDTestCase(test.TestCase): vendor_name='Open Source', driver_version=self.driver.VERSION, storage_protocol='ceph', - total_capacity_gb=28.44, - free_capacity_gb=27.0, + total_capacity_gb=mock.sentinel.total_capacity_gb, + free_capacity_gb=mock.sentinel.free_capacity_gb, reserved_percentage=0, thin_provisioning_support=True, - provisioned_capacity_gb=0.0, + provisioned_capacity_gb=mock.sentinel.provisioned_capacity_gb, max_over_subscription_ratio=1.0, multiattach=False) @@ -1162,19 +1156,12 @@ class RBDTestCase(test.TestCase): mock_driver_configuration) actual = self.driver.get_volume_stats(True) - client.cluster.mon_command.assert_called_once_with( - '{"prefix":"df", "format":"json"}', '') self.assertDictEqual(expected, actual) @common_mocks - def test_update_volume_stats_error(self): - client = self.mock_client.return_value - client.__enter__.return_value = client - - client.cluster = mock.Mock() - client.cluster.mon_command = mock.Mock() - client.cluster.mon_command.return_value = (22, '', '') - + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info') + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats') + def test_update_volume_stats_error(self, stats_mock, usage_mock): self.mock_object(self.driver.configuration, 'safe_get', mock_driver_configuration) @@ -1187,15 +1174,66 @@ class RBDTestCase(test.TestCase): free_capacity_gb='unknown', reserved_percentage=0, multiattach=False, - provisioned_capacity_gb=0.0, + provisioned_capacity_gb=0, max_over_subscription_ratio=1.0, thin_provisioning_support=True) actual = self.driver.get_volume_stats(True) - client.cluster.mon_command.assert_called_once_with( - '{"prefix":"df", "format":"json"}', '') self.assertDictEqual(expected, actual) + @ddt.data( + # Normal case, no quota and dynamic total + {'free_capacity': 27.0, 'total_capacity': 28.44}, + # No quota and static total + {'dynamic_total': False, + 'free_capacity': 27.0, 'total_capacity': 59.96}, + # Quota and dynamic total + {'quota_max_bytes': 3221225472, 'max_avail': 1073741824, + 'free_capacity': 1, 'total_capacity': 2.44}, + # Quota and static total + {'quota_max_bytes': 3221225472, 'max_avail': 1073741824, + 'dynamic_total': False, + 'free_capacity': 1, 'total_capacity': 3.00}, + # Quota and dynamic total when free would be negative + {'quota_max_bytes': 1073741824, + 'free_capacity': 0, 'total_capacity': 1.44}, + ) + @ddt.unpack + @common_mocks + def test_get_pool(self, free_capacity, total_capacity, + max_avail=28987613184, quota_max_bytes=0, + dynamic_total=True): + client = self.mock_client.return_value + client.__enter__.return_value = client + client.cluster.mon_command.side_effect = [ + (0, '{"stats":{"total_bytes":64385286144,' + '"total_used_bytes":3289628672,"total_avail_bytes":61095657472},' + '"pools":[{"name":"rbd","id":2,"stats":{"kb_used":1510197,' + '"bytes_used":1546440971,"max_avail":%s,"objects":412}},' + '{"name":"volumes","id":3,"stats":{"kb_used":0,"bytes_used":0,' + '"max_avail":28987613184,"objects":0}}]}\n' % max_avail, ''), + (0, '{"pool_name":"volumes","pool_id":4,"quota_max_objects":0,' + '"quota_max_bytes":%s}\n' % quota_max_bytes, ''), + ] + with mock.patch.object(self.driver.configuration, 'safe_get', + return_value=dynamic_total): + result = self.driver._get_pool_stats() + client.cluster.mon_command.assert_has_calls([ + mock.call('{"prefix":"df", "format":"json"}', ''), + mock.call('{"prefix":"osd pool get-quota", "pool": "rbd",' + ' "format":"json"}', ''), + ]) + self.assertEqual((free_capacity, total_capacity), result) + + @common_mocks + def test_get_pool_stats_failure(self): + client = self.mock_client.return_value + client.__enter__.return_value = client + client.cluster.mon_command.return_value = (-1, '', '') + + result = self.driver._get_pool_stats() + self.assertEqual(('unknown', 'unknown'), result) + @common_mocks def test_get_mon_addrs(self): with mock.patch.object(self.driver, '_execute') as mock_execute: @@ -1788,32 +1826,42 @@ class RBDTestCase(test.TestCase): self.assertEqual(RAISED_EXCEPTIONS, [self.mock_rbd.ImageExists]) - @ddt.data({'image_size': [1, 1], 'total_usage': 2}, - {'image_size': MockImageNotFoundException, 'total_usage': 0}) - @ddt.unpack - @mock.patch.object(driver, 'RADOSClient') - @mock.patch.object(driver, 'RBDVolumeProxy') - def test__get_usage_info(self, volume_proxy, mock_rados_client, - image_size, total_usage): - class FakeRBDProxy(object): - def list(self, ioctx): - return ['volume-1', 'volume-2'] + @mock.patch('cinder.volume.drivers.rbd.RBDVolumeProxy') + @mock.patch('cinder.volume.drivers.rbd.RADOSClient') + @mock.patch('cinder.volume.drivers.rbd.RBDDriver.RBDProxy') + def test__get_usage_info(self, rbdproxy_mock, client_mock, volproxy_mock): + def FakeVolProxy(size): + if size == -1: + size_mock = mock.Mock(side_effect=MockImageNotFoundException) + else: + size_mock = mock.Mock(return_value=size * units.Gi) + return mock.Mock(return_value=mock.Mock(size=size_mock)) - def diff_iterate(offset, length, from_snapshot, iterate_cb): - self.driver._iterate_cb(offset, length, True) + volumes = ['volume-1', 'non-existent', 'non-cinder-volume'] - self.driver._total_usage = 0 - with mock.patch.object(self.driver, 'RBDProxy') as rbd_proxy: - with mock.patch.object(self.driver, 'rbd') as mock_rbd: - mock_rbd.ImageNotFound = MockImageNotFoundException - proxy_list = mock.Mock() - proxy_list.side_effect = ['volume-1', 'volume-2'] - rbd_proxy.return_value = FakeRBDProxy() - image = volume_proxy.return_value.__enter__.return_value - image.size.side_effect = image_size - image.diff_iterate.side_effect = diff_iterate - self.driver._get_usage_info() - self.assertEqual(total_usage, self.driver._total_usage) + client = client_mock.return_value.__enter__.return_value + rbdproxy_mock.return_value.list.return_value = volumes + + volproxy_mock.side_effect = [ + mock.Mock(**{'__enter__': FakeVolProxy(1.0), + '__exit__': mock.Mock()}), + mock.Mock(**{'__enter__': FakeVolProxy(-1), + '__exit__': mock.Mock()}), + mock.Mock(**{'__enter__': FakeVolProxy(2.0), + '__exit__': mock.Mock()}) + ] + + with mock.patch.object(self.driver, 'rbd') as mock_rbd: + mock_rbd.ImageNotFound = MockImageNotFoundException + total_provision = self.driver._get_usage_info() + + rbdproxy_mock.return_value.list.assert_called_once_with(client.ioctx) + volproxy_mock.assert_has_calls([ + mock.call(self.driver, volumes[0], read_only=True), + mock.call(self.driver, volumes[1], read_only=True), + ]) + + self.assertEqual(3.00, total_provision) class ManagedRBDTestCase(test_driver.BaseDriverTestCase): diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index ee1834c6b33..e6a27831655 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -92,6 +92,11 @@ RBD_OPTS = [ 'ceph cluster to do a demotion/promotion of volumes. ' 'If value < 0, no timeout is set and default librados ' 'value is used.'), + cfg.BoolOpt('report_dynamic_total_capacity', default=True, + help='Set to True for driver to report total capacity as a ' + 'dynamic value -used + current free- and to False to ' + 'report a static value -quota max bytes if defined and ' + 'global size of cluster if not-.'), ] CONF = cfg.CONF @@ -360,22 +365,79 @@ class RBDDriver(driver.CloneableImageVD, ports.append(port) return hosts, ports - def _iterate_cb(self, offset, length, exists): - if exists: - self._total_usage += length - def _get_usage_info(self): + """Calculate provisioned volume space in GiB. + + Stats report should send provisioned size of volumes (snapshot must not + be included) and not the physical size of those volumes. + + We must include all volumes, not only Cinder created volumes, because + Cinder created volumes are reported by the Cinder core code as + allocated_capacity_gb. + """ + total_provisioned = 0 with RADOSClient(self) as client: for t in self.RBDProxy().list(client.ioctx): - if t.startswith('volume'): - # Only check for "volume" to allow some flexibility with - # non-default volume_name_template settings. Template - # must start with "volume". + with RBDVolumeProxy(self, t, read_only=True) as v: try: - with RBDVolumeProxy(self, t, read_only=True) as v: - v.diff_iterate(0, v.size(), None, self._iterate_cb) + size = v.size() except self.rbd.ImageNotFound: LOG.debug("Image %s is not found.", t) + else: + total_provisioned += size + + total_provisioned = math.ceil(float(total_provisioned) / units.Gi) + return total_provisioned + + def _get_pool_stats(self): + """Gets pool free and total capacity in GiB. + + Calculate free and total capacity of the pool based on the pool's + defined quota and pools stats. + + Returns a tuple with (free, total) where they are either unknown or a + real number with a 2 digit precision. + """ + pool_name = self.configuration.rbd_pool + + with RADOSClient(self) as client: + ret, df_outbuf, __ = client.cluster.mon_command( + '{"prefix":"df", "format":"json"}', '') + if ret: + LOG.warning('Unable to get rados pool stats.') + return 'unknown', 'unknown' + + ret, quota_outbuf, __ = client.cluster.mon_command( + '{"prefix":"osd pool get-quota", "pool": "%s",' + ' "format":"json"}' % pool_name, '') + if ret: + LOG.warning('Unable to get rados pool quotas.') + return 'unknown', 'unknown' + + df_data = json.loads(df_outbuf) + pool_stats = [pool for pool in df_data['pools'] + if pool['name'] == pool_name][0]['stats'] + + bytes_quota = json.loads(quota_outbuf)['quota_max_bytes'] + # With quota the total is the quota limit and free is quota - used + if bytes_quota: + total_capacity = bytes_quota + free_capacity = max(min(total_capacity - pool_stats['bytes_used'], + pool_stats['max_avail']), + 0) + # Without quota free is pools max available and total is global size + else: + total_capacity = df_data['stats']['total_bytes'] + free_capacity = pool_stats['max_avail'] + + # If we want dynamic total capacity (default behavior) + if self.configuration.safe_get('report_dynamic_total_capacity'): + total_capacity = free_capacity + pool_stats['bytes_used'] + + free_capacity = round((float(free_capacity) / units.Gi), 2) + total_capacity = round((float(total_capacity) / units.Gi), 2) + + return free_capacity, total_capacity def _update_volume_stats(self): stats = { @@ -401,27 +463,12 @@ class RBDDriver(driver.CloneableImageVD, stats['replication_targets'] = self._target_names try: - with RADOSClient(self) as client: - ret, outbuf, _outs = client.cluster.mon_command( - '{"prefix":"df", "format":"json"}', '') - if ret != 0: - LOG.warning('Unable to get rados pool stats.') - else: - outbuf = json.loads(outbuf) - pool_stats = [pool for pool in outbuf['pools'] if - pool['name'] == - self.configuration.rbd_pool][0]['stats'] - stats['free_capacity_gb'] = round((float( - pool_stats['max_avail']) / units.Gi), 2) - used_capacity_gb = float( - pool_stats['bytes_used']) / units.Gi - stats['total_capacity_gb'] = round( - (stats['free_capacity_gb'] + used_capacity_gb), 2) + free_capacity, total_capacity = self._get_pool_stats() + stats['free_capacity_gb'] = free_capacity + stats['total_capacity_gb'] = total_capacity - self._total_usage = 0 - self._get_usage_info() - total_usage_gb = math.ceil(float(self._total_usage) / units.Gi) - stats['provisioned_capacity_gb'] = total_usage_gb + total_gbi = self._get_usage_info() + stats['provisioned_capacity_gb'] = total_gbi except self.rados.Error: # just log and return unknown capacities LOG.exception('error refreshing volume stats') diff --git a/releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml b/releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml new file mode 100644 index 00000000000..9728faaecda --- /dev/null +++ b/releasenotes/notes/rbd-stats-report-0c7e803bb0b1aedb.yaml @@ -0,0 +1,18 @@ +--- +features: + - | + RBD driver supports returning a static total capacity value instead of a + dynamic value like it's been doing. Configurable with + `report_dynamic_total_capacity` configuration option. +upgrade: + - | + RBD/Ceph backends should adjust `max_over_subscription_ratio` to take into + account that the driver is no longer reporting volume's physical usage but + it's provisioned size. +fixes: + - | + RBD stats report has been fixed, now properly reports + `allocated_capacity_gb` and `provisioned_capacity_gb` with the sum of the + sizes of the volumes (not physical sizes) for volumes created by Cinder and + all available in the pool respectively. Free capacity will now properly + handle quota size restrictions of the pool.