Merge "[CEPH] Cleanup the ceph-osd helm-chart"

This commit is contained in:
Zuul 2019-01-02 16:42:37 +00:00 committed by Gerrit Code Review
commit 8dba8cb648
7 changed files with 248 additions and 296 deletions

View File

@ -16,19 +16,12 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
*/}} */}}
source /tmp/osd-common.sh
set -ex set -ex
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}"
: "${OSD_JOURNAL_UUID:=$(uuidgen)}"
: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}"
: "${OSD_PATH_BASE:=/var/lib/ceph/osd/${CLUSTER}}"
: "${OSD_SOFT_FORCE_ZAP:=1}" : "${OSD_SOFT_FORCE_ZAP:=1}"
: "${OSD_JOURNAL_PARTITION:=}" : "${OSD_JOURNAL_DISK:=}"
eval OSD_PG_INTERVAL_FIX=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["osd_pg_interval_fix"]))')
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
export OSD_DEVICE="/var/lib/ceph/osd" export OSD_DEVICE="/var/lib/ceph/osd"
@ -42,80 +35,23 @@ else
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
fi fi
if [[ ! -e /etc/ceph/${CLUSTER}.conf ]]; then
echo "ERROR- /etc/ceph/${CLUSTER}.conf must exist; get it from your existing mon"
exit 1
fi
if [[ -z "${OSD_DEVICE}" ]];then if [[ -z "${OSD_DEVICE}" ]];then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1 exit 1
fi fi
if [[ ! -b "${OSD_DEVICE}" ]]; then if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !" echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
exit 1 exit 1
fi fi
# Calculate proper device names, given a device and partition number
function dev_part {
local osd_device=${1}
local osd_partition=${2}
if [[ -L ${osd_device} ]]; then
# This device is a symlink. Work out it's actual device
local actual_device
actual_device=$(readlink -f "${osd_device}")
if [[ "${actual_device:0-1:1}" == [0-9] ]]; then
local desired_partition="${actual_device}p${osd_partition}"
else
local desired_partition="${actual_device}${osd_partition}"
fi
# Now search for a symlink in the directory of $osd_device
# that has the correct desired partition, and the longest
# shared prefix with the original symlink
local symdir
symdir=$(dirname "${osd_device}")
local link=""
local pfxlen=0
for option in ${symdir}/*; do
[[ -e $option ]] || break
if [[ $(readlink -f "$option") == "$desired_partition" ]]; then
local optprefixlen
optprefixlen=$(prefix_length "$option" "$osd_device")
if [[ $optprefixlen > $pfxlen ]]; then
link=$option
pfxlen=$optprefixlen
fi
fi
done
if [[ $pfxlen -eq 0 ]]; then
>&2 echo "Could not locate appropriate symlink for partition ${osd_partition} of ${osd_device}"
exit 1
fi
echo "$link"
elif [[ "${osd_device:0-1:1}" == [0-9] ]]; then
echo "${osd_device}p${osd_partition}"
else
echo "${osd_device}${osd_partition}"
fi
}
CEPH_DISK_OPTIONS="" CEPH_DISK_OPTIONS=""
CEPH_OSD_OPTIONS="" CEPH_OSD_OPTIONS=""
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1) DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
LOCKBOX_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}3 || true)
JOURNAL_PART=$(dev_part ${OSD_DEVICE} 2)
# watch the udev event queue, and exit if all current events are handled # watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600 udevadm settle --timeout=600
# Wait for a file to exist, regardless of the type
function wait_for_file {
timeout 10 bash -c "while [ ! -e ${1} ]; do echo 'Waiting for ${1} to show up' && sleep 1 ; done"
}
DATA_PART=$(dev_part ${OSD_DEVICE} 1) DATA_PART=$(dev_part ${OSD_DEVICE} 1)
MOUNTED_PART=${DATA_PART} MOUNTED_PART=${DATA_PART}
@ -131,85 +67,41 @@ OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
OSD_KEYRING="${OSD_PATH}/keyring" OSD_KEYRING="${OSD_PATH}/keyring"
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing # NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0 OSD_WEIGHT=0
function crush_create_or_move { # NOTE(supamatt): add or move the OSD's CRUSH location
local crush_location=${1} crush_location
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} || true
}
function crush_add_and_move {
local crush_failure_domain_type=${1}
local crush_failure_domain_name=${2}
local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}")
crush_create_or_move "${crush_location}"
local crush_failure_domain_location_check=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
# as create-or-move may not appropiately move them.
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${crush_failure_domain_name}" root=default || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true
fi
}
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
elif [ "x${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "$(echo ${CRUSH_FAILURE_DOMAIN_TYPE}_$(echo ${HOSTNAME} | cut -c ${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}))"
else
# NOTE(supamatt): neither variables are defined then we fall back to expected default behavior
crush_create_or_move "${CRUSH_LOCATION}"
fi
else
crush_create_or_move "${CRUSH_LOCATION}"
fi
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
if [ -n "${OSD_JOURNAL}" ]; then if [ -n "${OSD_JOURNAL}" ]; then
if [ -b "${OSD_JOURNAL}" ]; then if [ -b "${OSD_JOURNAL}" ]; then
OSD_JOURNAL_PARTITION="$(echo "${OSD_JOURNAL_PARTITION}" | sed 's/[^0-9]//g')" OSD_JOURNAL_DISK="$(readlink -f ${OSD_PATH}/journal)"
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then if [ -z "${OSD_JOURNAL_DISK}" ]; then
# maybe they specified the journal as a /dev path like '/dev/sdc12': echo "ERROR: Unable to find journal device ${OSD_JOURNAL_DISK}"
JDEV="$(echo "${OSD_JOURNAL}" | sed 's/\(.*[^0-9]\)[0-9]*$/\1/')" exit 1
if [ -d "/sys/block/$(basename "${JDEV}")/$(basename "${OSD_JOURNAL}")" ]; then
OSD_JOURNAL="$(dev_part "${JDEV}" "$(echo "${OSD_JOURNAL}" | sed 's/.*[^0-9]\([0-9]*\)$/\1/')")"
elif [ "${OSD_DEVICE}" == "${OSD_JOURNAL}" ]; then
# journal and osd disk are on the same device.
OSD_JOURNAL="$(dev_part "${OSD_JOURNAL}" 2)"
else
# they likely supplied a bare device and prepare created partition 1.
OSD_JOURNAL="$(dev_part "${OSD_JOURNAL}" 1)"
fi
else else
OSD_JOURNAL="$(dev_part "${OSD_JOURNAL}" "${OSD_JOURNAL_PARTITION}")" OSD_JOURNAL="${OSD_JOURNAL_DISK}"
fi fi
fi fi
if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then
OSD_JOURNAL="${OSD_JOURNAL}/journal.${OSD_ID}" OSD_JOURNAL="${OSD_JOURNAL}/journal.${OSD_ID}"
wait_for_file "${OSD_JOURNAL}"
chown ceph. "${OSD_JOURNAL}"
else else
if [ ! -b "${OSD_JOURNAL}" ]; then if [ ! -b "${OSD_JOURNAL}" ]; then
echo "ERROR: Unable to find journal device ${OSD_JOURNAL}" echo "ERROR: Unable to find journal device ${OSD_JOURNAL}"
exit 1 exit 1
else else
wait_for_file "${OSD_JOURNAL}" chown ceph. "${OSD_JOURNAL}"
chown ceph. "${OSD_JOURNAL}" "${DATA_PART}"
fi fi
fi fi
else else
wait_for_file "${JOURNAL_PART}" wait_for_file "${OSD_JOURNAL}"
chown ceph. "${JOURNAL_PART}" "${DATA_PART}" chown ceph. "${OSD_JOURNAL}"
OSD_JOURNAL="${JOURNAL_PART}"
fi fi
fi fi
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
# NOTE(supamatt): https://tracker.ceph.com/issues/21142 is impacting us due to the older Ceph version 12.2.3 that we are running # NOTE(supamatt): This function is a workaround to Ceph upstream bug #21142
if [ "x${OSD_PG_INTERVAL_FIX}" == "xtrue" ]; then osd_pg_interval_fix
for PG in $(ls ${OSD_PATH}/current | awk -F'_' '/head/{print $1}'); do
ceph-objectstore-tool --data-path ${OSD_PATH} --op rm-past-intervals --pgid ${PG};
done
fi
fi fi
if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then

View File

@ -0,0 +1,159 @@
#!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}"
: "${OSD_PATH_BASE:=/var/lib/ceph/osd/${CLUSTER}}"
: "${CEPH_CONF:="/etc/ceph/${CLUSTER}.conf"}"
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}"
: "${OSD_JOURNAL_UUID:=$(uuidgen)}"
eval OSD_PG_INTERVAL_FIX=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["osd_pg_interval_fix"]))')
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
if [[ $(ceph -v | egrep -q "12.2|luminous"; echo $?) -ne 0 ]]; then
echo "ERROR- need Luminous release"
exit 1
fi
if [ -z "${HOSTNAME}" ]; then
echo "HOSTNAME not set; This will prevent to add an OSD into the CRUSH map"
exit 1
fi
if [[ ! -e ${CEPH_CONF}.template ]]; then
echo "ERROR- ${CEPH_CONF}.template must exist; get it from your existing mon"
exit 1
else
ENDPOINT=$(kubectl get endpoints ceph-mon -n ${NAMESPACE} -o json | awk -F'"' -v port=${MON_PORT} '/ip/{print $4":"port}' | paste -sd',')
if [[ ${ENDPOINT} == "" ]]; then
# No endpoints are available, just copy ceph.conf as-is
/bin/sh -c -e "cat ${CEPH_CONF}.template | tee ${CEPH_CONF}" || true
else
/bin/sh -c -e "cat ${CEPH_CONF}.template | sed 's/mon_host.*/mon_host = ${ENDPOINT}/g' | tee ${CEPH_CONF}" || true
fi
fi
# Wait for a file to exist, regardless of the type
function wait_for_file {
timeout 10 bash -c "while [ ! -e ${1} ]; do echo 'Waiting for ${1} to show up' && sleep 1 ; done"
}
function is_available {
command -v $@ &>/dev/null
}
function crush_create_or_move {
local crush_location=${1}
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} || true
}
function crush_add_and_move {
local crush_failure_domain_type=${1}
local crush_failure_domain_name=${2}
local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}")
crush_create_or_move "${crush_location}"
local crush_failure_domain_location_check=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
# as create-or-move may not appropiately move them.
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${crush_failure_domain_name}" root=default || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true
fi
}
function crush_location {
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
elif [ "x${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "$(echo ${CRUSH_FAILURE_DOMAIN_TYPE}_$(echo ${HOSTNAME} | cut -c ${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}))"
else
# NOTE(supamatt): neither variables are defined then we fall back to default behavior
crush_create_or_move "${CRUSH_LOCATION}"
fi
else
crush_create_or_move "${CRUSH_LOCATION}"
fi
}
# Calculate proper device names, given a device and partition number
function dev_part {
local osd_device=${1}
local osd_partition=${2}
if [[ -L ${osd_device} ]]; then
# This device is a symlink. Work out it's actual device
local actual_device=$(readlink -f "${osd_device}")
local bn=$(basename "${osd_device}")
if [[ "${actual_device:0-1:1}" == [0-9] ]]; then
local desired_partition="${actual_device}p${osd_partition}"
else
local desired_partition="${actual_device}${osd_partition}"
fi
# Now search for a symlink in the directory of $osd_device
# that has the correct desired partition, and the longest
# shared prefix with the original symlink
local symdir=$(dirname "${osd_device}")
local link=""
local pfxlen=0
for option in ${symdir}/*; do
[[ -e $option ]] || break
if [[ $(readlink -f "${option}") == "${desired_partition}" ]]; then
local optprefixlen=$(prefix_length "${option}" "${bn}")
if [[ ${optprefixlen} > ${pfxlen} ]]; then
link=${symdir}/${option}
pfxlen=${optprefixlen}
fi
fi
done
if [[ $pfxlen -eq 0 ]]; then
>&2 echo "Could not locate appropriate symlink for partition ${osd_partition} of ${osd_device}"
exit 1
fi
echo "$link"
elif [[ "${osd_device:0-1:1}" == [0-9] ]]; then
echo "${osd_device}p${osd_partition}"
else
echo "${osd_device}${osd_partition}"
fi
}
function osd_pg_interval_fix {
# NOTE(supamatt): https://tracker.ceph.com/issues/21142 is impacting us due to the older Ceph version 12.2.3 that we are running
if [ "x${OSD_PG_INTERVAL_FIX}" == "xtrue" ]; then
for PG in $(ls ${OSD_PATH}/current | awk -F'_' '/head/{print $1}'); do
ceph-objectstore-tool --data-path ${OSD_PATH} --op rm-past-intervals --pgid ${PG};
done
fi
}
function udev_settle {
partprobe "${OSD_DEVICE}"
# watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600
}

View File

@ -1,32 +1,27 @@
#!/bin/bash #!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex set -ex
export LC_ALL=C export LC_ALL=C
: "${HOSTNAME:=$(uname -n)}"
: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}" source /tmp/osd-common.sh
: "${OSD_PATH_BASE:=/var/lib/ceph/osd/${CLUSTER}}"
: "${JOURNAL_DIR:=/var/lib/ceph/journal}" : "${JOURNAL_DIR:=/var/lib/ceph/journal}"
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}"
eval OSD_PG_INTERVAL_FIX=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["osd_pg_interval_fix"]))')
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
function is_available {
command -v $@ &>/dev/null
}
if is_available rpm; then
OS_VENDOR=redhat
source /etc/sysconfig/ceph
elif is_available dpkg; then
OS_VENDOR=ubuntu
source /etc/default/ceph
fi
if [[ $(ceph -v | egrep -q "12.2|luminous"; echo $?) -ne 0 ]]; then
echo "ERROR- need Luminous release"
exit 1
fi
if [[ ! -d /var/lib/ceph/osd ]]; then if [[ ! -d /var/lib/ceph/osd ]]; then
echo "ERROR- could not find the osd directory, did you bind mount the OSD data directory?" echo "ERROR- could not find the osd directory, did you bind mount the OSD data directory?"
@ -34,11 +29,6 @@ if [[ ! -d /var/lib/ceph/osd ]]; then
exit 1 exit 1
fi fi
if [ -z "${HOSTNAME}" ]; then
echo "HOSTNAME not set; This will prevent to add an OSD into the CRUSH map"
exit 1
fi
# check if anything is present, if not, create an osd and its directory # check if anything is present, if not, create an osd and its directory
if [[ -n "$(find /var/lib/ceph/osd -prune -empty)" ]]; then if [[ -n "$(find /var/lib/ceph/osd -prune -empty)" ]]; then
echo "Creating osd" echo "Creating osd"
@ -56,14 +46,14 @@ if [[ -n "$(find /var/lib/ceph/osd -prune -empty)" ]]; then
OSD_PATH="$OSD_PATH_BASE-$OSD_ID/" OSD_PATH="$OSD_PATH_BASE-$OSD_ID/"
if [ -n "${JOURNAL_DIR}" ]; then if [ -n "${JOURNAL_DIR}" ]; then
OSD_J="${JOURNAL_DIR}/journal.${OSD_ID}" OSD_JOURNAL="${JOURNAL_DIR}/journal.${OSD_ID}"
chown -R ceph. ${JOURNAL_DIR} chown -R ceph. ${JOURNAL_DIR}
else else
if [ -n "${JOURNAL}" ]; then if [ -n "${JOURNAL}" ]; then
OSD_J=${JOURNAL} OSD_JOURNAL=${JOURNAL}
chown -R ceph. $(dirname ${JOURNAL_DIR}) chown -R ceph. $(dirname ${JOURNAL_DIR})
else else
OSD_J=${OSD_PATH%/}/journal OSD_JOURNAL=${OSD_PATH%/}/journal
fi fi
fi fi
# create the folder and own it # create the folder and own it
@ -74,57 +64,21 @@ if [[ -n "$(find /var/lib/ceph/osd -prune -empty)" ]]; then
ceph-authtool --create-keyring ${OSD_PATH%/}/keyring --name osd.${OSD_ID} --add-key ${OSD_SECRET} ceph-authtool --create-keyring ${OSD_PATH%/}/keyring --name osd.${OSD_ID} --add-key ${OSD_SECRET}
OSD_KEYRING="${OSD_PATH%/}/keyring" OSD_KEYRING="${OSD_PATH%/}/keyring"
# init data directory # init data directory
ceph-osd -i ${OSD_ID} --mkfs --osd-uuid ${UUID} --mkjournal --osd-journal ${OSD_J} --setuser ceph --setgroup ceph ceph-osd -i ${OSD_ID} --mkfs --osd-uuid ${UUID} --mkjournal --osd-journal ${OSD_JOURNAL} --setuser ceph --setgroup ceph
# add the osd to the crush map # add the osd to the crush map
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing # NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0 OSD_WEIGHT=0
function crush_create_or_move { # NOTE(supamatt): add or move the OSD's CRUSH location
local crush_location=${1} crush_location
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} || true
}
function crush_add_and_move {
local crush_failure_domain_type=${1}
local crush_failure_domain_name=${2}
local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}")
crush_create_or_move "${crush_location}"
local crush_failure_domain_location_check=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
# as create-or-move may not appropiately move them.
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${crush_failure_domain_name}" root=default || true
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true
fi
}
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
elif [ "x${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}" != "xfalse" ]; then
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "$(echo ${CRUSH_FAILURE_DOMAIN_TYPE}_$(echo ${HOSTNAME} | cut -c ${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}))"
else
# NOTE(supamatt): neither variables are defined then we fall back to default behavior
crush_create_or_move "${CRUSH_LOCATION}"
fi
else
crush_create_or_move "${CRUSH_LOCATION}"
fi
fi fi
# NOTE(supamatt): This function is a workaround to Ceph upstream bug #21142
osd_pg_interval_fix
# create the directory and an empty Procfile # create the directory and an empty Procfile
mkdir -p /etc/forego/"${CLUSTER}" mkdir -p /etc/forego/"${CLUSTER}"
echo "" > /etc/forego/"${CLUSTER}"/Procfile echo "" > /etc/forego/"${CLUSTER}"/Procfile
# NOTE(supamatt): https://tracker.ceph.com/issues/21142 is impacting us due to the older Ceph version 12.2.3 that we are running
if [ "x${OSD_PG_INTERVAL_FIX}" == "xtrue" ]; then
for PG in $(ls ${OSD_PATH}/current | awk -F'_' '/head/{print $1}'); do
ceph-objectstore-tool --data-path ${OSD_PATH} --op rm-past-intervals --pgid ${PG};
done
fi
for OSD_ID in $(ls /var/lib/ceph/osd | sed 's/.*-//'); do for OSD_ID in $(ls /var/lib/ceph/osd | sed 's/.*-//'); do
OSD_PATH="$OSD_PATH_BASE-$OSD_ID/" OSD_PATH="$OSD_PATH_BASE-$OSD_ID/"
OSD_KEYRING="${OSD_PATH%/}/keyring" OSD_KEYRING="${OSD_PATH%/}/keyring"
@ -133,16 +87,16 @@ for OSD_ID in $(ls /var/lib/ceph/osd | sed 's/.*-//'); do
chown -R ceph. ${JOURNAL_DIR} chown -R ceph. ${JOURNAL_DIR}
else else
if [ -n "${JOURNAL}" ]; then if [ -n "${JOURNAL}" ]; then
OSD_J=${JOURNAL} OSD_JOURNAL=${JOURNAL}
chown -R ceph. $(dirname ${JOURNAL_DIR}) chown -R ceph. $(dirname ${JOURNAL_DIR})
else else
OSD_J=${OSD_PATH%/}/journal OSD_JOURNAL=${OSD_PATH%/}/journal
fi fi
fi fi
# log osd filesystem type # log osd filesystem type
FS_TYPE=`stat --file-system -c "%T" ${OSD_PATH}` FS_TYPE=`stat --file-system -c "%T" ${OSD_PATH}`
echo "OSD $OSD_PATH filesystem type: $FS_TYPE" echo "OSD $OSD_PATH filesystem type: $FS_TYPE"
echo "${CLUSTER}-${OSD_ID}: /usr/bin/ceph-osd --cluster ${CLUSTER} -f -i ${OSD_ID} --osd-journal ${OSD_J} -k $OSD_KEYRING" | tee -a /etc/forego/"${CLUSTER}"/Procfile echo "${CLUSTER}-${OSD_ID}: /usr/bin/ceph-osd --cluster ${CLUSTER} -f -i ${OSD_ID} --osd-journal ${OSD_JOURNAL} -k $OSD_KEYRING" | tee -a /etc/forego/"${CLUSTER}"/Procfile
done done
exec /usr/local/bin/forego start -f /etc/forego/"${CLUSTER}"/Procfile exec /usr/local/bin/forego start -f /etc/forego/"${CLUSTER}"/Procfile

View File

@ -18,26 +18,12 @@ limitations under the License.
set -ex set -ex
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}" source /tmp/osd-common.sh
: "${OSD_JOURNAL_UUID:=$(uuidgen)}"
: "${OSD_FORCE_ZAP:=1}" : "${OSD_FORCE_ZAP:=1}"
: "${CEPH_CONF:="/etc/ceph/${CLUSTER}.conf"}"
# We do not want to zap journal disk. Tracking this option seperatly. # We do not want to zap journal disk. Tracking this option seperatly.
: "${JOURNAL_FORCE_ZAP:=0}" : "${JOURNAL_FORCE_ZAP:=0}"
if [[ ! -e ${CEPH_CONF}.template ]]; then
echo "ERROR- ${CEPH_CONF}.template must exist; get it from your existing mon"
exit 1
else
ENDPOINT=$(kubectl get endpoints ceph-mon -n ${NAMESPACE} -o json | awk -F'"' -v port=${MON_PORT} '/ip/{print $4":"port}' | paste -sd',')
if [[ ${ENDPOINT} == "" ]]; then
# No endpoints are available, just copy ceph.conf as-is
/bin/sh -c -e "cat ${CEPH_CONF}.template | tee ${CEPH_CONF}" || true
else
/bin/sh -c -e "cat ${CEPH_CONF}.template | sed 's/mon_host.*/mon_host = ${ENDPOINT}/g' | tee ${CEPH_CONF}" || true
fi
fi
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
export OSD_DEVICE="/var/lib/ceph/osd" export OSD_DEVICE="/var/lib/ceph/osd"
else else
@ -50,54 +36,6 @@ else
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
fi fi
function udev_settle {
partprobe "${OSD_DEVICE}"
# watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600
}
# Calculate proper device names, given a device and partition number
function dev_part {
local OSD_DEVICE=${1}
local OSD_PARTITION=${2}
if [[ -L ${OSD_DEVICE} ]]; then
# This device is a symlink. Work out it's actual device
local ACTUAL_DEVICE=$(readlink -f ${OSD_DEVICE})
local BN=$(basename ${OSD_DEVICE})
if [[ "${ACTUAL_DEVICE:0-1:1}" == [0-9] ]]; then
local DESIRED_PARTITION="${ACTUAL_DEVICE}p${OSD_PARTITION}"
else
local DESIRED_PARTITION="${ACTUAL_DEVICE}${OSD_PARTITION}"
fi
# Now search for a symlink in the directory of $OSD_DEVICE
# that has the correct desired partition, and the longest
# shared prefix with the original symlink
local SYMDIR=$(dirname ${OSD_DEVICE})
local LINK=""
local PFXLEN=0
for OPTION in $(ls $SYMDIR); do
if [[ $(readlink -f $SYMDIR/$OPTION) == $DESIRED_PARTITION ]]; then
local OPT_PREFIX_LEN=$(prefix_length $OPTION $BN)
if [[ $OPT_PREFIX_LEN > $PFXLEN ]]; then
LINK=$SYMDIR/$OPTION
PFXLEN=$OPT_PREFIX_LEN
fi
fi
done
if [[ $PFXLEN -eq 0 ]]; then
>&2 log "Could not locate appropriate symlink for partition ${OSD_PARTITION} of ${OSD_DEVICE}"
exit 1
fi
echo "$LINK"
elif [[ "${OSD_DEVICE:0-1:1}" == [0-9] ]]; then
echo "${OSD_DEVICE}p${OSD_PARTITION}"
else
echo "${OSD_DEVICE}${OSD_PARTITION}"
fi
}
function osd_disk_prepare { function osd_disk_prepare {
if [[ -z "${OSD_DEVICE}" ]];then if [[ -z "${OSD_DEVICE}" ]];then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
@ -119,7 +57,7 @@ function osd_disk_prepare {
if ! parted --script ${OSD_DEVICE} print > /dev/null 2>&1; then if ! parted --script ${OSD_DEVICE} print > /dev/null 2>&1; then
if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_ZAP is enabled so we are zapping the device anyway" echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_ZAP is enabled so we are zapping the device anyway"
ceph-disk -v zap ${OSD_DEVICE} sgdisk -Z ${OSD_DEVICE}
else else
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird." echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
echo "It would be too dangerous to destroy it without any notification." echo "It would be too dangerous to destroy it without any notification."
@ -134,18 +72,18 @@ function osd_disk_prepare {
if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then
if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then if [[ ${OSD_FORCE_ZAP} -eq 1 ]]; then
if [ -b "${OSD_DEVICE}1" ]; then if [ -b "${OSD_DEVICE}1" ]; then
local cephFSID=`ceph-conf --lookup fsid` local cephFSID=$(ceph-conf --lookup fsid)
if [ ! -z "${cephFSID}" ]; then if [ ! -z "${cephFSID}" ]; then
local tmpmnt=`mktemp -d` local tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt} mount ${OSD_DEVICE}1 ${tmpmnt}
if [ -f "${tmpmnt}/ceph_fsid" ]; then if [ -f "${tmpmnt}/ceph_fsid" ]; then
osdFSID=`cat "${tmpmnt}/ceph_fsid"` osdFSID=$(cat "${tmpmnt}/ceph_fsid")
umount ${tmpmnt} umount ${tmpmnt}
if [ ${osdFSID} != ${cephFSID} ]; then if [ ${osdFSID} != ${cephFSID} ]; then
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_ZAP was set, we will zap this device." echo "Because OSD_FORCE_ZAP was set, we will zap this device."
ceph-disk -v zap ${OSD_DEVICE} sgdisk -Z ${OSD_DEVICE}
else else
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_ZAP is set, but will be ignored and the device will not be zapped." echo "OSD_FORCE_ZAP is set, but will be ignored and the device will not be zapped."
@ -156,7 +94,7 @@ function osd_disk_prepare {
umount ${tmpmnt} umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_ZAP was set, we will zap this device." echo "Because OSD_FORCE_ZAP was set, we will zap this device."
ceph-disk -v zap ${OSD_DEVICE} sgdisk -Z ${OSD_DEVICE}
fi fi
else else
echo "Unable to determine the FSID of the current cluster." echo "Unable to determine the FSID of the current cluster."
@ -182,12 +120,12 @@ function osd_disk_prepare {
# we only care about journals for filestore. # we only care about journals for filestore.
if [ -n "${OSD_JOURNAL}" ]; then if [ -n "${OSD_JOURNAL}" ]; then
if [ -b $OSD_JOURNAL ]; then if [ -b $OSD_JOURNAL ]; then
OSD_JOURNAL=`readlink -f ${OSD_JOURNAL}` OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
OSD_JOURNAL_PARTITION=`echo $OSD_JOURNAL_PARTITION | sed 's/[^0-9]//g'` OSD_JOURNAL_PARTITION=$(echo $OSD_JOURNAL_PARTITION | sed 's/[^0-9]//g')
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then if [ -z "${OSD_JOURNAL_PARTITION}" ]; then
# maybe they specified the journal as a /dev path like '/dev/sdc12': # maybe they specified the journal as a /dev path like '/dev/sdc12':
local JDEV=`echo ${OSD_JOURNAL} | sed 's/\(.*[^0-9]\)[0-9]*$/\1/'` local JDEV=$(echo ${OSD_JOURNAL} | sed 's/\(.*[^0-9]\)[0-9]*$/\1/')
if [ -d /sys/block/`basename $JDEV`/`basename $OSD_JOURNAL` ]; then if [ -d /sys/block/$(basename ${JDEV})/$(basename ${OSD_JOURNAL}) ]; then
OSD_JOURNAL=$(dev_part ${JDEV} `echo ${OSD_JOURNAL} |\ OSD_JOURNAL=$(dev_part ${JDEV} `echo ${OSD_JOURNAL} |\
sed 's/.*[^0-9]\([0-9]*\)$/\1/'`) sed 's/.*[^0-9]\([0-9]*\)$/\1/'`)
OSD_JOURNAL_PARTITION=${JDEV} OSD_JOURNAL_PARTITION=${JDEV}
@ -198,8 +136,8 @@ function osd_disk_prepare {
fi fi
chown ceph. ${OSD_JOURNAL} chown ceph. ${OSD_JOURNAL}
else else
echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}" echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}"
echo "For better performance, consider moving your journal to a separate device" echo "For better performance on HDD, consider moving your journal to a separate device"
fi fi
CLI_OPTS="${CLI_OPTS} --filestore" CLI_OPTS="${CLI_OPTS} --filestore"
else else
@ -212,12 +150,12 @@ function osd_disk_prepare {
echo "OSD_FORCE_ZAP is set, so we will erase the journal device ${OSD_JOURNAL}" echo "OSD_FORCE_ZAP is set, so we will erase the journal device ${OSD_JOURNAL}"
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then if [ -z "${OSD_JOURNAL_PARTITION}" ]; then
# it's a raw block device. nuke any existing partition table. # it's a raw block device. nuke any existing partition table.
parted -s ${OSD_JOURNAL} mklabel msdos sgdisk -Z ${OSD_JOURNAL}
else else
# we are likely working on a partition. Just make a filesystem on # we are likely working on a partition. Just make a filesystem on
# the device, as other partitions may be in use so nuking the whole # the device, as other partitions may be in use so nuking the whole
# disk isn't safe. # disk isn't safe.
mkfs -t xfs -f ${OSD_JOURNAL} wipefs ${OSD_JOURNAL}
fi fi
fi fi

View File

@ -17,19 +17,6 @@ limitations under the License.
*/}} */}}
set -ex set -ex
: "${CEPH_CONF:="/etc/ceph/${CLUSTER}.conf"}"
if [[ ! -e ${CEPH_CONF}.template ]]; then
echo "ERROR- ${CEPH_CONF}.template must exist; get it from your existing mon"
exit 1
else
ENDPOINT=$(kubectl get endpoints ceph-mon -n ${NAMESPACE} -o json | awk -F'"' -v port=${MON_PORT} '/ip/{print $4":"port}' | paste -sd',')
if [[ ${ENDPOINT} == "" ]]; then
/bin/sh -c -e "cat ${CEPH_CONF}.template | tee ${CEPH_CONF}" || true
else
/bin/sh -c -e "cat ${CEPH_CONF}.template | sed 's/mon_host.*/mon_host = ${ENDPOINT}/g' | tee ${CEPH_CONF}" || true
fi
fi
echo "LAUNCHING OSD: in ${STORAGE_TYPE%-*}:${STORAGE_TYPE#*-} mode" echo "LAUNCHING OSD: in ${STORAGE_TYPE%-*}:${STORAGE_TYPE#*-} mode"
exec "/tmp/osd-${STORAGE_TYPE%-*}.sh" exec "/tmp/osd-${STORAGE_TYPE%-*}.sh"

View File

@ -38,6 +38,8 @@ data:
{{ tuple "bin/osd/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/osd/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-stop.sh: | osd-stop.sh: |
{{ tuple "bin/osd/_stop.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/osd/_stop.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-common.sh: |
{{ tuple "bin/osd/_common.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
init-dirs.sh: | init-dirs.sh: |
{{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
helm-tests.sh: | helm-tests.sh: |

View File

@ -76,6 +76,14 @@ spec:
mountPath: /tmp/init-dirs.sh mountPath: /tmp/init-dirs.sh
subPath: init-dirs.sh subPath: init-dirs.sh
readOnly: true readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-common.sh
subPath: osd-common.sh
readOnly: true
- name: ceph-osd-etc
mountPath: /etc/ceph/storage.json
subPath: storage.json
readOnly: true
- name: pod-var-lib-ceph - name: pod-var-lib-ceph
mountPath: /var/lib/ceph mountPath: /var/lib/ceph
readOnly: false readOnly: false
@ -149,10 +157,18 @@ spec:
mountPath: /tmp/osd-init.sh mountPath: /tmp/osd-init.sh
subPath: osd-init.sh subPath: osd-init.sh
readOnly: true readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-common.sh
subPath: osd-common.sh
readOnly: true
- name: ceph-osd-etc - name: ceph-osd-etc
mountPath: /etc/ceph/ceph.conf.template mountPath: /etc/ceph/ceph.conf.template
subPath: ceph.conf subPath: ceph.conf
readOnly: true readOnly: true
- name: ceph-osd-etc
mountPath: /etc/ceph/storage.json
subPath: storage.json
readOnly: true
- name: ceph-bootstrap-osd-keyring - name: ceph-bootstrap-osd-keyring
mountPath: /var/lib/ceph/bootstrap-osd/ceph.keyring mountPath: /var/lib/ceph/bootstrap-osd/ceph.keyring
subPath: ceph.keyring subPath: ceph.keyring
@ -252,6 +268,10 @@ spec:
mountPath: /tmp/utils-checkDNS.sh mountPath: /tmp/utils-checkDNS.sh
subPath: utils-checkDNS.sh subPath: utils-checkDNS.sh
readOnly: true readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-common.sh
subPath: osd-common.sh
readOnly: true
- name: ceph-osd-etc - name: ceph-osd-etc
mountPath: /etc/ceph/storage.json mountPath: /etc/ceph/storage.json
subPath: storage.json subPath: storage.json