Enable Ceph charts to be rack aware for CRUSH
Add support for a rack level CRUSH map. Rack level CRUSH support is enabled by using the "rack_replicated_rule" crush rule. Change-Id: I4df224f2821872faa2eddec2120832e9a22f4a7c
This commit is contained in:
parent
5d356f9265
commit
5ce9f2eb3b
@ -37,6 +37,10 @@ if ! ceph --cluster "${CLUSTER}" osd crush rule ls | grep -q "^same_host$"; then
|
|||||||
ceph --cluster "${CLUSTER}" osd crush rule create-simple same_host default osd
|
ceph --cluster "${CLUSTER}" osd crush rule create-simple same_host default osd
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if ! ceph --cluster "${CLUSTER}" osd crush rule ls | grep -q "^rack_replicated_rule$"; then
|
||||||
|
ceph --cluster "${CLUSTER}" osd crush rule create-simple rack_replicated_rule default rack
|
||||||
|
fi
|
||||||
|
|
||||||
function reweight_osds () {
|
function reweight_osds () {
|
||||||
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do
|
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do
|
||||||
OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
|
OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
|
||||||
|
@ -128,9 +128,13 @@ conf:
|
|||||||
pg_per_osd: 100
|
pg_per_osd: 100
|
||||||
protected: true
|
protected: true
|
||||||
default:
|
default:
|
||||||
#NOTE(portdirect): this should be 'same_host' for a single node
|
# NOTE(supamatt): Accepted values are:
|
||||||
# cluster to be in a healthy state
|
# same_host for a single node
|
||||||
|
# replicated_rule for a multi node
|
||||||
|
# rack_replicated_rule for a multi node in multiple (>=3) racks
|
||||||
|
# Ceph cluster must be in a healthy state.
|
||||||
crush_rule: replicated_rule
|
crush_rule: replicated_rule
|
||||||
|
|
||||||
#NOTE(portdirect): this section describes the pools that will be managed by
|
#NOTE(portdirect): this section describes the pools that will be managed by
|
||||||
# the ceph pool management job, as it tunes the pgs and crush rule, based on
|
# the ceph pool management job, as it tunes the pgs and crush rule, based on
|
||||||
# the above.
|
# the above.
|
||||||
|
@ -126,13 +126,26 @@ OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
|||||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||||
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
||||||
OSD_WEIGHT=0
|
OSD_WEIGHT=0
|
||||||
ceph \
|
if [ "x${CRUSH_RULE}" == "xrack_replicated_rule" ]; then
|
||||||
--cluster "${CLUSTER}" \
|
RACK_LOCATION=$(echo rack_$(echo ${HOSTNAME} | cut -c ${RACK_REGEX}))
|
||||||
--name="osd.${OSD_ID}" \
|
CRUSH_LOCATION=$(echo "root=default rack=${RACK_LOCATION} host=${HOSTNAME}")
|
||||||
--keyring="${OSD_KEYRING}" \
|
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||||
osd \
|
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION} || true
|
||||||
crush \
|
RACK_LOCATION_CHECK=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | awk -F'"' '/rack/{print $4}')
|
||||||
create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION}
|
if [ "x${RACK_LOCATION_CHECK}" != x${RACK_LOCATION} ]; then
|
||||||
|
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
|
||||||
|
# as create-or-move may not appropiately move them.
|
||||||
|
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||||
|
osd crush add-bucket ${RACK_LOCATION} rack || true
|
||||||
|
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||||
|
osd crush move ${RACK_LOCATION} root=default || true
|
||||||
|
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||||
|
osd crush move ${HOSTNAME} rack=${RACK_LOCATION} || true
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||||
|
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION} || true
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
||||||
if [ -n "${OSD_JOURNAL}" ]; then
|
if [ -n "${OSD_JOURNAL}" ]; then
|
||||||
|
@ -179,6 +179,10 @@ spec:
|
|||||||
value: "ceph"
|
value: "ceph"
|
||||||
- name: CEPH_GET_ADMIN_KEY
|
- name: CEPH_GET_ADMIN_KEY
|
||||||
value: "1"
|
value: "1"
|
||||||
|
- name: CRUSH_RULE
|
||||||
|
value: {{ .Values.conf.pool.default.crush_rule }}
|
||||||
|
- name: RACK_REGEX
|
||||||
|
value: {{ .Values.conf.pool.default.rack_regex }}
|
||||||
command:
|
command:
|
||||||
- /tmp/osd-start.sh
|
- /tmp/osd-start.sh
|
||||||
lifecycle:
|
lifecycle:
|
||||||
|
@ -107,6 +107,18 @@ conf:
|
|||||||
osd_mount_options_xfs: "rw,noatime,largeio,inode64,swalloc,logbufs=8,logbsize=256k,allocsize=4M"
|
osd_mount_options_xfs: "rw,noatime,largeio,inode64,swalloc,logbufs=8,logbsize=256k,allocsize=4M"
|
||||||
osd_journal_size: 10240
|
osd_journal_size: 10240
|
||||||
|
|
||||||
|
pool:
|
||||||
|
default:
|
||||||
|
# NOTE(supamatt): Accepted values are:
|
||||||
|
# same_host for a single node
|
||||||
|
# replicated_rule for a multi node
|
||||||
|
# rack_replicated_rule for a multi node in multiple (>=3) racks
|
||||||
|
# Ceph cluster must be in a healthy state.
|
||||||
|
crush_rule: replicated_rule
|
||||||
|
# NOTE(supamatt): By default use the first 8 characters of the hostname to
|
||||||
|
# define the the rack type bucket names for CRUSH.
|
||||||
|
rack_regex: "1-8"
|
||||||
|
|
||||||
storage:
|
storage:
|
||||||
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
|
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
|
||||||
# define OSD pods that will be deployed across the cluster.
|
# define OSD pods that will be deployed across the cluster.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user