From 16b72c1e221157287270a8d233f82eb9019f7fac Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Fri, 16 Oct 2020 18:11:33 +0000 Subject: [PATCH] [ceph-osd] Synchronization audit for the ceph-volume osd-init script There are race conditions in the ceph-volume osd-init script that occasionally cause deployment and OSD restart issues. This change attempts to resolve those and stabilize the script when multiple instances run simultaneously on the same host. Change-Id: I79407059fa20fb51c6840717a083a8dc616ba410 --- ceph-osd/Chart.yaml | 2 +- .../bin/osd/ceph-volume/_common.sh.tpl | 32 ++++++++++++------- .../ceph-volume/_init-with-ceph-volume.sh.tpl | 16 +++++----- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index fb625d388..0af580792 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.6 +version: 0.1.7 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 7ee57a9b3..030e95091 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -76,15 +76,15 @@ function ceph_cmd_retry() { function locked() { exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1 - flock -w 600 --verbose "${lock_fd}" + flock -w 600 --verbose "${lock_fd}" &> /dev/null "$@" - flock -u "${lock_fd}" + flock -u "${lock_fd}" &> /dev/null } function global_locked() { exec {global_lock_fd}>/var/lib/ceph/tmp/init-osd-global.lock || exit 1 - flock -w 600 --verbose "${global_lock_fd}" + flock -w 600 --verbose "${global_lock_fd}" &> /dev/null "$@" - flock -u "${global_lock_fd}" + flock -u "${global_lock_fd}" &> /dev/null } function crush_create_or_move { @@ -248,7 +248,7 @@ function disk_zap { locked lvremove -y ${logical_volume} fi done - local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") + local volume_group=$(locked pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") if [[ ${volume_group} ]]; then vgremove -y ${volume_group} pvremove -y ${device} @@ -260,13 +260,21 @@ function disk_zap { dd if=/dev/zero of=${device} bs=1M count=200 } +# This should be run atomically to prevent unexpected cache states +function lvm_scan { + pvscan --cache + vgscan --cache + lvscan --cache + pvscan + vgscan + lvscan +} + function udev_settle { osd_devices="${OSD_DEVICE}" udevadm settle --timeout=600 partprobe "${OSD_DEVICE}" - locked pvscan --cache - locked vgscan --cache - locked lvscan --cache + locked lvm_scan if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then if [ ! -z "$BLOCK_DB" ]; then osd_devices="${osd_devices}\|${BLOCK_DB}" @@ -353,7 +361,7 @@ function get_lv_size_from_device { device="$1" logical_volume="$(get_lv_from_device ${device})" - lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 + locked lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 } # Helper function to get the crush weight for an osd device @@ -427,12 +435,12 @@ function get_lvm_path_from_device { select="$1" options="--noheadings -o lv_dm_path" - pvs ${options} -S "${select}" | tr -d ' ' + locked pvs ${options} -S "${select}" | tr -d ' ' } function get_vg_name_from_device { device="$1" - pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-vg-${pv_uuid}" @@ -442,7 +450,7 @@ function get_vg_name_from_device { function get_lv_name_from_device { device="$1" device_type="$2" - pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-${device_type}-${pv_uuid}" diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl index deeec1005..91f60ce0b 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl @@ -67,7 +67,7 @@ function rename_lvs { local lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.db_uuid) if [[ "${lv_tag}" ]]; then - local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') + local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') if [[ "${lv_device}" ]]; then local db_vg=$(echo ${lv_device} | awk -F "/" '{print $3}') @@ -84,7 +84,7 @@ function rename_lvs { lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.wal_uuid) if [[ "${lv_tag}" ]]; then - local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') + local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') if [[ "${lv_device}" ]]; then local wal_vg=$(echo ${lv_device} | awk -F "/" '{print $3}') @@ -104,10 +104,10 @@ function rename_lvs { # renaming should be completed prior to calling this function update_lv_tags { local data_disk=$1 - local pv_uuid=$(pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}') + local pv_uuid=$(locked pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then - local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")" + local volumes="$(locked lvs --no-headings | grep -e "${pv_uuid}")" local block_device db_device wal_device vg_name local old_block_device old_db_device old_wal_device @@ -188,7 +188,7 @@ function prep_device { udev_settle vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE}) lv_name=$(get_lv_name_from_device ${data_disk} ${device_type}) - VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]') + VG=$(locked vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]') if [[ $VG ]]; then DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}") CEPH_LVM_PREPARE=1 @@ -211,7 +211,7 @@ function prep_device { VG=$(get_vg_name_from_device ${BLOCK_DEVICE}) locked vgrename "ceph-vg-${random_uuid}" "${VG}" fi - logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]') + logical_volume=$(locked lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]') if [[ $logical_volume != "${lv_name}" ]]; then locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}" fi @@ -399,7 +399,7 @@ function osd_disk_prepare { OSD_VG=${vg_name} fi lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv) - if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then + if [[ ! "$(locked lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG} fi OSD_LV=${OSD_VG}/${lv_name} @@ -424,7 +424,7 @@ function osd_disk_prepare { global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" fi else - if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then + if locked pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then CEPH_LVM_PREPARE=0 fi fi