Add ceph-pool-weight option for calculating pgs

Provide the weight option to the Ceph broker request API for requesting
the creation of a new Ceph storage pool. The weight is used to indicate
the percentage of the data that the pool is expected to consume. Each
environment may have slightly different needs based on the type of
workload so a config option labelled ceph-pool-weight is provided to
allow the operator to tune this value.

Closes-Bug: #1492742

Change-Id: I56c7de4d9213fe85ce89cbad957291b438f6f92f
This commit is contained in:
Billy Olsen 2016-06-28 12:39:22 -07:00
parent e3e0d0d6b1
commit c837863cf2
10 changed files with 203 additions and 59 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ tags
.unit-state.db .unit-state.db
tests/*.img tests/*.img
trusty trusty
.idea

View File

@ -71,7 +71,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
base_charms = { base_charms = {
'mysql': ['precise', 'trusty'], 'mysql': ['precise', 'trusty'],
'mongodb': ['precise', 'trusty'], 'mongodb': ['precise', 'trusty'],
'nrpe': ['precise', 'trusty'], 'nrpe': ['precise', 'trusty', 'wily', 'xenial'],
} }
for svc in other_services: for svc in other_services:
@ -112,7 +112,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
# Charms which should use the source config option # Charms which should use the source config option
use_source = ['mysql', 'mongodb', 'rabbitmq-server', 'ceph', use_source = ['mysql', 'mongodb', 'rabbitmq-server', 'ceph',
'ceph-osd', 'ceph-radosgw', 'ceph-mon'] 'ceph-osd', 'ceph-radosgw', 'ceph-mon', 'ceph-proxy']
# Charms which can not use openstack-origin, ie. many subordinates # Charms which can not use openstack-origin, ie. many subordinates
no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe', no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe',

View File

@ -57,6 +57,7 @@ from charmhelpers.core.host import (
mkdir, mkdir,
write_file, write_file,
pwgen, pwgen,
lsb_release,
) )
from charmhelpers.contrib.hahelpers.cluster import ( from charmhelpers.contrib.hahelpers.cluster import (
determine_apache_port, determine_apache_port,
@ -1195,7 +1196,10 @@ class WorkerConfigContext(OSContextGenerator):
def __call__(self): def __call__(self):
multiplier = config('worker-multiplier') or 0 multiplier = config('worker-multiplier') or 0
ctxt = {"workers": self.num_cpus * multiplier} count = int(self.num_cpus * multiplier)
if multiplier > 0 and count == 0:
count = 1
ctxt = {"workers": count}
return ctxt return ctxt
@ -1436,7 +1440,8 @@ class AppArmorContext(OSContextGenerator):
:return ctxt: Dictionary of the apparmor profile or None :return ctxt: Dictionary of the apparmor profile or None
""" """
if config('aa-profile-mode') in ['disable', 'enforce', 'complain']: if config('aa-profile-mode') in ['disable', 'enforce', 'complain']:
ctxt = {'aa_profile_mode': config('aa-profile-mode')} ctxt = {'aa_profile_mode': config('aa-profile-mode'),
'ubuntu_release': lsb_release()['DISTRIB_RELEASE']}
else: else:
ctxt = None ctxt = None
return ctxt return ctxt

View File

@ -220,7 +220,6 @@ GIT_DEFAULT_REPOS = {
} }
GIT_DEFAULT_BRANCHES = { GIT_DEFAULT_BRANCHES = {
'kilo': 'stable/kilo',
'liberty': 'stable/liberty', 'liberty': 'stable/liberty',
'mitaka': 'stable/mitaka', 'mitaka': 'stable/mitaka',
'master': 'master', 'master': 'master',
@ -413,7 +412,8 @@ def os_release(package, base='essex'):
global os_rel global os_rel
if os_rel: if os_rel:
return os_rel return os_rel
os_rel = (get_os_codename_package(package, fatal=False) or os_rel = (git_os_codename_install_source(config('openstack-origin-git')) or
get_os_codename_package(package, fatal=False) or
get_os_codename_install_source(config('openstack-origin')) or get_os_codename_install_source(config('openstack-origin')) or
base) base)
return os_rel return os_rel
@ -719,7 +719,24 @@ def git_install_requested():
return config('openstack-origin-git') is not None return config('openstack-origin-git') is not None
requirements_dir = None def git_os_codename_install_source(projects_yaml):
"""
Returns OpenStack codename of release being installed from source.
"""
if git_install_requested():
projects = _git_yaml_load(projects_yaml)
if projects in GIT_DEFAULT_BRANCHES.keys():
if projects == 'master':
return 'newton'
return projects
if 'release' in projects:
if projects['release'] == 'master':
return 'newton'
return projects['release']
return None
def git_default_repos(projects_yaml): def git_default_repos(projects_yaml):
@ -740,12 +757,6 @@ def git_default_repos(projects_yaml):
} }
repos = [repo] repos = [repo]
# NOTE(coreycb): This is a temp work-around until the requirements
# repo moves from stable/kilo branch to kilo-eol tag. The core
# repos have already done this.
if default == 'kilo':
branch = 'kilo-eol'
# neutron-* and nova-* charms require some additional repos # neutron-* and nova-* charms require some additional repos
if service in ['neutron-api', 'neutron-gateway', if service in ['neutron-api', 'neutron-gateway',
'neutron-openvswitch']: 'neutron-openvswitch']:
@ -778,7 +789,7 @@ def git_default_repos(projects_yaml):
} }
repos.append(repo) repos.append(repo)
return yaml.dump(dict(repositories=repos)) return yaml.dump(dict(repositories=repos, release=default))
return projects_yaml return projects_yaml
@ -793,6 +804,9 @@ def _git_yaml_load(projects_yaml):
return yaml.load(projects_yaml) return yaml.load(projects_yaml)
requirements_dir = None
def git_clone_and_install(projects_yaml, core_project): def git_clone_and_install(projects_yaml, core_project):
""" """
Clone/install all specified OpenStack repositories. Clone/install all specified OpenStack repositories.
@ -856,6 +870,10 @@ def git_clone_and_install(projects_yaml, core_project):
# upper-constraints didn't exist until after icehouse # upper-constraints didn't exist until after icehouse
if not os.path.isfile(constraints): if not os.path.isfile(constraints):
constraints = None constraints = None
# use constraints unless project yaml sets use_constraints to false
if 'use_constraints' in projects.keys():
if not projects['use_constraints']:
constraints = None
else: else:
repo_dir = _git_clone_and_install_single(repo, branch, depth, repo_dir = _git_clone_and_install_single(repo, branch, depth,
parent_dir, http_proxy, parent_dir, http_proxy,
@ -882,6 +900,8 @@ def _git_validate_projects_yaml(projects, core_project):
if projects['repositories'][-1]['name'] != core_project: if projects['repositories'][-1]['name'] != core_project:
error_out('{} git repo must be specified last'.format(core_project)) error_out('{} git repo must be specified last'.format(core_project))
_git_ensure_key_exists('release', projects)
def _git_ensure_key_exists(key, keys): def _git_ensure_key_exists(key, keys):
""" """

View File

@ -21,9 +21,10 @@
# James Page <james.page@ubuntu.com> # James Page <james.page@ubuntu.com>
# Adam Gandelman <adamg@ubuntu.com> # Adam Gandelman <adamg@ubuntu.com>
# #
import bisect
import errno import errno
import hashlib import hashlib
import math
import six import six
import os import os
@ -76,8 +77,16 @@ log to syslog = {use_syslog}
err to syslog = {use_syslog} err to syslog = {use_syslog}
clog to syslog = {use_syslog} clog to syslog = {use_syslog}
""" """
# For 50 < osds < 240,000 OSDs (Roughly 1 Exabyte at 6T OSDs)
powers_of_two = [8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608] # The number of placement groups per OSD to target for placement group
# calculations. This number is chosen as 100 due to the ceph PG Calc
# documentation recommending to choose 100 for clusters which are not
# expected to increase in the foreseeable future. Since the majority of the
# calculations are done on deployment, target the case of non-expanding
# clusters as the default.
DEFAULT_PGS_PER_OSD_TARGET = 100
DEFAULT_POOL_WEIGHT = 10.0
LEGACY_PG_COUNT = 200
def validator(value, valid_type, valid_range=None): def validator(value, valid_type, valid_range=None):
@ -184,42 +193,106 @@ class Pool(object):
check_call(['ceph', '--id', self.service, 'osd', 'tier', 'remove-overlay', self.name]) check_call(['ceph', '--id', self.service, 'osd', 'tier', 'remove-overlay', self.name])
check_call(['ceph', '--id', self.service, 'osd', 'tier', 'remove', self.name, cache_pool]) check_call(['ceph', '--id', self.service, 'osd', 'tier', 'remove', self.name, cache_pool])
def get_pgs(self, pool_size): def get_pgs(self, pool_size, percent_data=DEFAULT_POOL_WEIGHT):
""" """Return the number of placement groups to use when creating the pool.
:param pool_size: int. pool_size is either the number of replicas for replicated pools or the K+M sum for
erasure coded pools Returns the number of placement groups which should be specified when
creating the pool. This is based upon the calculation guidelines
provided by the Ceph Placement Group Calculator (located online at
http://ceph.com/pgcalc/).
The number of placement groups are calculated using the following:
(Target PGs per OSD) * (OSD #) * (%Data)
----------------------------------------
(Pool size)
Per the upstream guidelines, the OSD # should really be considered
based on the number of OSDs which are eligible to be selected by the
pool. Since the pool creation doesn't specify any of CRUSH set rules,
the default rule will be dependent upon the type of pool being
created (replicated or erasure).
This code makes no attempt to determine the number of OSDs which can be
selected for the specific rule, rather it is left to the user to tune
in the form of 'expected-osd-count' config option.
:param pool_size: int. pool_size is either the number of replicas for
replicated pools or the K+M sum for erasure coded pools
:param percent_data: float. the percentage of data that is expected to
be contained in the pool for the specific OSD set. Default value
is to assume 10% of the data is for this pool, which is a
relatively low % of the data but allows for the pg_num to be
increased. NOTE: the default is primarily to handle the scenario
where related charms requiring pools has not been upgraded to
include an update to indicate their relative usage of the pools.
:return: int. The number of pgs to use. :return: int. The number of pgs to use.
""" """
# Note: This calculation follows the approach that is provided
# by the Ceph PG Calculator located at http://ceph.com/pgcalc/.
validator(value=pool_size, valid_type=int) validator(value=pool_size, valid_type=int)
# Ensure that percent data is set to something - even with a default
# it can be set to None, which would wreak havoc below.
if percent_data is None:
percent_data = DEFAULT_POOL_WEIGHT
# If the expected-osd-count is specified, then use the max between
# the expected-osd-count and the actual osd_count
osd_list = get_osds(self.service) osd_list = get_osds(self.service)
if not osd_list: expected = config('expected-osd-count') or 0
if osd_list:
osd_count = max(expected, len(osd_list))
# Log a message to provide some insight if the calculations claim
# to be off because someone is setting the expected count and
# there are more OSDs in reality. Try to make a proper guess
# based upon the cluster itself.
if expected and osd_count != expected:
log("Found more OSDs than provided expected count. "
"Using the actual count instead", INFO)
elif expected:
# Use the expected-osd-count in older ceph versions to allow for
# a more accurate pg calculations
osd_count = expected
else:
# NOTE(james-page): Default to 200 for older ceph versions # NOTE(james-page): Default to 200 for older ceph versions
# which don't support OSD query from cli # which don't support OSD query from cli
return 200 return LEGACY_PG_COUNT
osd_list_length = len(osd_list) percent_data /= 100.0
# Calculate based on Ceph best practices target_pgs_per_osd = config('pgs-per-osd') or DEFAULT_PGS_PER_OSD_TARGET
if osd_list_length < 5: num_pg = (target_pgs_per_osd * osd_count * percent_data) // pool_size
return 128
elif 5 < osd_list_length < 10: # The CRUSH algorithm has a slight optimization for placement groups
return 512 # with powers of 2 so find the nearest power of 2. If the nearest
elif 10 < osd_list_length < 50: # power of 2 is more than 25% below the original value, the next
return 4096 # highest value is used. To do this, find the nearest power of 2 such
# that 2^n <= num_pg, check to see if its within the 25% tolerance.
exponent = math.floor(math.log(num_pg, 2))
nearest = 2 ** exponent
if (num_pg - nearest) > (num_pg * 0.25):
# Choose the next highest power of 2 since the nearest is more
# than 25% below the original value.
return int(nearest * 2)
else: else:
estimate = (osd_list_length * 100) / pool_size return int(nearest)
# Return the next nearest power of 2
index = bisect.bisect_right(powers_of_two, estimate)
return powers_of_two[index]
class ReplicatedPool(Pool): class ReplicatedPool(Pool):
def __init__(self, service, name, pg_num=None, replicas=2): def __init__(self, service, name, pg_num=None, replicas=2,
percent_data=10.0):
super(ReplicatedPool, self).__init__(service=service, name=name) super(ReplicatedPool, self).__init__(service=service, name=name)
self.replicas = replicas self.replicas = replicas
if pg_num is None: if pg_num:
self.pg_num = self.get_pgs(self.replicas) # Since the number of placement groups were specified, ensure
# that there aren't too many created.
max_pgs = self.get_pgs(self.replicas, 100.0)
self.pg_num = min(pg_num, max_pgs)
else: else:
self.pg_num = pg_num self.pg_num = self.get_pgs(self.replicas, percent_data)
def create(self): def create(self):
if not pool_exists(self.service, self.name): if not pool_exists(self.service, self.name):
@ -238,30 +311,39 @@ class ReplicatedPool(Pool):
# Default jerasure erasure coded pool # Default jerasure erasure coded pool
class ErasurePool(Pool): class ErasurePool(Pool):
def __init__(self, service, name, erasure_code_profile="default"): def __init__(self, service, name, erasure_code_profile="default",
percent_data=10.0):
super(ErasurePool, self).__init__(service=service, name=name) super(ErasurePool, self).__init__(service=service, name=name)
self.erasure_code_profile = erasure_code_profile self.erasure_code_profile = erasure_code_profile
self.percent_data = percent_data
def create(self): def create(self):
if not pool_exists(self.service, self.name): if not pool_exists(self.service, self.name):
# Try to find the erasure profile information so we can properly size the pgs # Try to find the erasure profile information in order to properly
erasure_profile = get_erasure_profile(service=self.service, name=self.erasure_code_profile) # size the number of placement groups. The size of an erasure
# coded placement group is calculated as k+m.
erasure_profile = get_erasure_profile(self.service,
self.erasure_code_profile)
# Check for errors # Check for errors
if erasure_profile is None: if erasure_profile is None:
log(message='Failed to discover erasure_profile named={}'.format(self.erasure_code_profile), msg = ("Failed to discover erasure profile named "
level=ERROR) "{}".format(self.erasure_code_profile))
raise PoolCreationError(message='unable to find erasure profile {}'.format(self.erasure_code_profile)) log(msg, level=ERROR)
raise PoolCreationError(msg)
if 'k' not in erasure_profile or 'm' not in erasure_profile: if 'k' not in erasure_profile or 'm' not in erasure_profile:
# Error # Error
log(message='Unable to find k (data chunks) or m (coding chunks) in {}'.format(erasure_profile), msg = ("Unable to find k (data chunks) or m (coding chunks) "
level=ERROR) "in erasure profile {}".format(erasure_profile))
raise PoolCreationError( log(msg, level=ERROR)
message='unable to find k (data chunks) or m (coding chunks) in {}'.format(erasure_profile)) raise PoolCreationError(msg)
pgs = self.get_pgs(int(erasure_profile['k']) + int(erasure_profile['m'])) k = int(erasure_profile['k'])
m = int(erasure_profile['m'])
pgs = self.get_pgs(k + m, self.percent_data)
# Create it # Create it
cmd = ['ceph', '--id', self.service, 'osd', 'pool', 'create', self.name, str(pgs), str(pgs), cmd = ['ceph', '--id', self.service, 'osd', 'pool', 'create',
self.name, str(pgs), str(pgs),
'erasure', self.erasure_code_profile] 'erasure', self.erasure_code_profile]
try: try:
check_call(cmd) check_call(cmd)
@ -955,16 +1037,22 @@ class CephBrokerRq(object):
self.request_id = str(uuid.uuid1()) self.request_id = str(uuid.uuid1())
self.ops = [] self.ops = []
def add_op_create_pool(self, name, replica_count=3, pg_num=None): def add_op_create_pool(self, name, replica_count=3, pg_num=None,
weight=None):
"""Adds an operation to create a pool. """Adds an operation to create a pool.
@param pg_num setting: optional setting. If not provided, this value @param pg_num setting: optional setting. If not provided, this value
will be calculated by the broker based on how many OSDs are in the will be calculated by the broker based on how many OSDs are in the
cluster at the time of creation. Note that, if provided, this value cluster at the time of creation. Note that, if provided, this value
will be capped at the current available maximum. will be capped at the current available maximum.
@param weight: the percentage of data the pool makes up
""" """
if pg_num and weight:
raise ValueError('pg_num and weight are mutually exclusive')
self.ops.append({'op': 'create-pool', 'name': name, self.ops.append({'op': 'create-pool', 'name': name,
'replicas': replica_count, 'pg_num': pg_num}) 'replicas': replica_count, 'pg_num': pg_num,
'weight': weight})
def set_ops(self, ops): def set_ops(self, ops):
"""Set request ops to provided value. """Set request ops to provided value.
@ -982,7 +1070,7 @@ class CephBrokerRq(object):
def _ops_equal(self, other): def _ops_equal(self, other):
if len(self.ops) == len(other.ops): if len(self.ops) == len(other.ops):
for req_no in range(0, len(self.ops)): for req_no in range(0, len(self.ops)):
for key in ['replicas', 'name', 'op', 'pg_num']: for key in ['replicas', 'name', 'op', 'pg_num', 'weight']:
if self.ops[req_no].get(key) != other.ops[req_no].get(key): if self.ops[req_no].get(key) != other.ops[req_no].get(key):
return False return False
else: else:

View File

@ -174,7 +174,7 @@ def init_is_systemd():
def adduser(username, password=None, shell='/bin/bash', system_user=False, def adduser(username, password=None, shell='/bin/bash', system_user=False,
primary_group=None, secondary_groups=None, uid=None): primary_group=None, secondary_groups=None, uid=None, home_dir=None):
"""Add a user to the system. """Add a user to the system.
Will log but otherwise succeed if the user already exists. Will log but otherwise succeed if the user already exists.
@ -186,6 +186,7 @@ def adduser(username, password=None, shell='/bin/bash', system_user=False,
:param str primary_group: Primary group for user; defaults to username :param str primary_group: Primary group for user; defaults to username
:param list secondary_groups: Optional list of additional groups :param list secondary_groups: Optional list of additional groups
:param int uid: UID for user being created :param int uid: UID for user being created
:param str home_dir: Home directory for user
:returns: The password database entry struct, as returned by `pwd.getpwnam` :returns: The password database entry struct, as returned by `pwd.getpwnam`
""" """
@ -200,6 +201,8 @@ def adduser(username, password=None, shell='/bin/bash', system_user=False,
cmd = ['useradd'] cmd = ['useradd']
if uid: if uid:
cmd.extend(['--uid', str(uid)]) cmd.extend(['--uid', str(uid)])
if home_dir:
cmd.extend(['--home', str(home_dir)])
if system_user or password is None: if system_user or password is None:
cmd.append('--system') cmd.append('--system')
else: else:

View File

@ -110,6 +110,21 @@ options:
rbd pool has been created, changing this value will not have any rbd pool has been created, changing this value will not have any
effect (although it can be changed in ceph by manually configuring effect (although it can be changed in ceph by manually configuring
your ceph cluster). your ceph cluster).
ceph-pool-weight:
type: int
default: 5
description: |
Defines a relative weighting of the pool as a percentage of the total
amount of data in the Ceph cluster. This effectively weights the number
of placement groups for the pool created to be appropriately portioned
to the amount of data expected. For example, if the compute images
for the OpenStack compute instances are expected to take up 20% of the
overall configuration then this value would be specified as 20. Note -
it is important to choose an appropriate value for the pool weight as
this directly affects the number of placement groups which will be
created for the pool. The number of placement groups for a pool can
only be increased, never decreased - so it is important to identify the
percent of data that will likely reside in the pool.
# HA configuration settings # HA configuration settings
dns-ha: dns-ha:
type: boolean type: boolean

View File

@ -300,7 +300,9 @@ def get_ceph_request():
service = service_name() service = service_name()
rq = CephBrokerRq() rq = CephBrokerRq()
replicas = config('ceph-osd-replication-count') replicas = config('ceph-osd-replication-count')
rq.add_op_create_pool(name=service, replica_count=replicas) weight = config('ceph-pool-weight')
rq.add_op_create_pool(name=service, replica_count=replicas,
weight=weight)
return rq return rq

View File

@ -71,7 +71,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
base_charms = { base_charms = {
'mysql': ['precise', 'trusty'], 'mysql': ['precise', 'trusty'],
'mongodb': ['precise', 'trusty'], 'mongodb': ['precise', 'trusty'],
'nrpe': ['precise', 'trusty'], 'nrpe': ['precise', 'trusty', 'wily', 'xenial'],
} }
for svc in other_services: for svc in other_services:
@ -112,7 +112,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
# Charms which should use the source config option # Charms which should use the source config option
use_source = ['mysql', 'mongodb', 'rabbitmq-server', 'ceph', use_source = ['mysql', 'mongodb', 'rabbitmq-server', 'ceph',
'ceph-osd', 'ceph-radosgw', 'ceph-mon'] 'ceph-osd', 'ceph-radosgw', 'ceph-mon', 'ceph-proxy']
# Charms which can not use openstack-origin, ie. many subordinates # Charms which can not use openstack-origin, ie. many subordinates
no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe', no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe',

View File

@ -26,7 +26,7 @@ sys.modules['apt'] = mock_apt
mock_apt.apt_pkg = MagicMock() mock_apt.apt_pkg = MagicMock()
os.environ['JUJU_UNIT_NAME'] = 'glance' os.environ['JUJU_UNIT_NAME'] = 'glance'
import hooks.glance_utils as utils import hooks.glance_utils as utils # noqa
_reg = utils.register_configs _reg = utils.register_configs
_map = utils.restart_map _map = utils.restart_map
@ -465,6 +465,16 @@ class GlanceRelationTests(CharmTestCase):
for c in [call('/etc/glance/glance.conf')]: for c in [call('/etc/glance/glance.conf')]:
self.assertNotIn(c, configs.write.call_args_list) self.assertNotIn(c, configs.write.call_args_list)
@patch('hooks.charmhelpers.contrib.storage.linux.ceph.CephBrokerRq'
'.add_op_create_pool')
def test_create_pool_op(self, mock_broker):
self.service_name.return_value = 'glance'
self.test_config.set('ceph-osd-replication-count', 3)
self.test_config.set('ceph-pool-weight', 6)
relations.get_ceph_request()
mock_broker.assert_called_with(name='glance', replica_count=3,
weight=6)
@patch.object(relations, 'get_ceph_request') @patch.object(relations, 'get_ceph_request')
@patch.object(relations, 'send_request_if_needed') @patch.object(relations, 'send_request_if_needed')
@patch.object(relations, 'is_request_complete') @patch.object(relations, 'is_request_complete')