[Ceph OSD] Add OSD device class

The PS adds possibility to override device class through
the key in values.yaml. Motivation: In some cases the device driver
is providing incorrect information about the type of device and
automatic detection is setting incorrect device class.


Change-Id: I29eb2d5100f020a20f65686ef85c0975f909b39d
This commit is contained in:
Kabanov, Dmitrii 2020-05-04 14:19:22 -07:00 committed by Dmitrii Kabanov
parent 9da1296667
commit 459d044040
5 changed files with 50 additions and 0 deletions

View File

@ -27,6 +27,7 @@ set -ex
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))')
if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then
echo "ERROR- need Luminous/Mimic/Nautilus release" echo "ERROR- need Luminous/Mimic/Nautilus release"
@ -95,6 +96,7 @@ function crush_add_and_move {
} }
function crush_location { function crush_location {
set_device_class
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}" crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
@ -237,3 +239,18 @@ function udev_settle {
done done
} }
function set_device_class {
if [ ! -z "$DEVICE_CLASS" ]; then
if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush rm-device-class "osd.${OSD_ID}"
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}"
fi
fi
}
function get_device_class {
echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush get-device-class "osd.${OSD_ID}")
}

View File

@ -177,6 +177,12 @@ function osd_disk_prepare {
udev_settle udev_settle
ceph-disk -v prepare ${CLI_OPTS} ceph-disk -v prepare ${CLI_OPTS}
if [ ! -z "$DEVICE_CLASS" ]; then
local osd_id=$(cat "/var/lib/ceph/osd/*/whoami")
ceph osd crush rm-device-class osd."${osd_id}"
ceph osd crush set-device-class "${DEVICE_CLASS}" osd."${osd_id}"
fi
} }
function osd_journal_create { function osd_journal_create {

View File

@ -27,6 +27,7 @@ set -ex
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))')
if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then if [[ $(ceph -v | egrep -q "nautilus|mimic|luminous"; echo $?) -ne 0 ]]; then
echo "ERROR- need Luminous/Mimic/Nautilus release" echo "ERROR- need Luminous/Mimic/Nautilus release"
@ -95,6 +96,7 @@ function crush_add_and_move {
} }
function crush_location { function crush_location {
set_device_class
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}" crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
@ -321,3 +323,19 @@ function get_osd_wal_device_from_device {
# Use get_lvm_tag_from_device to get the OSD WAL device from the device # Use get_lvm_tag_from_device to get the OSD WAL device from the device
get_lvm_tag_from_device ${device} ceph.wal_device get_lvm_tag_from_device ${device} ceph.wal_device
} }
function set_device_class {
if [ ! -z "$DEVICE_CLASS" ]; then
if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush rm-device-class "osd.${OSD_ID}"
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}"
fi
fi
}
function get_device_class {
echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush get-device-class "osd.${OSD_ID}")
}

View File

@ -384,6 +384,11 @@ function osd_disk_prepare {
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}" CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
udev_settle udev_settle
fi fi
if [ ! -z "$DEVICE_CLASS" ]; then
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
fi
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}" CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')

View File

@ -208,6 +208,10 @@ conf:
failure_domain_by_hostname: "false" failure_domain_by_hostname: "false"
failure_domain_name: "false" failure_domain_name: "false"
# Note: You can override the device class by adding the value (e.g., hdd, ssd or nvme).
# Leave it empty if you don't need to modify the device class.
device_class: ""
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to # NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
# define OSD pods that will be deployed across the cluster. # define OSD pods that will be deployed across the cluster.
# when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create # when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create