[CEPH] Support a troubleshooting option to reset PG metadata

Ceph upstream bug: https://tracker.ceph.com/issues/21142 is
impacting the availability of our sites in pipeline. Add an option
to reset the past interval metadata time on an OSDs PG to solve for
this issue if it occurs.

Change-Id: I1fe0bee6ce8aa402c241f1ad457bbf532945a530
This commit is contained in:
Matthew Heler 2018-12-18 14:32:44 -06:00
parent 4233c25308
commit e1a3819a0d
3 changed files with 23 additions and 0 deletions

View File

@ -25,6 +25,7 @@ set -ex
: "${OSD_SOFT_FORCE_ZAP:=1}" : "${OSD_SOFT_FORCE_ZAP:=1}"
: "${OSD_JOURNAL_PARTITION:=}" : "${OSD_JOURNAL_PARTITION:=}"
eval OSD_PG_INTERVAL_FIX=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["osd_pg_interval_fix"]))')
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
@ -202,6 +203,15 @@ if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
fi fi
fi fi
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
# NOTE(supamatt): https://tracker.ceph.com/issues/21142 is impacting us due to the older Ceph version 12.2.3 that we are running
if [ "x${OSD_PG_INTERVAL_FIX}" == "xtrue" ]; then
for PG in $(ls ${OSD_PATH}/current | awk -F'_' '/head/{print $1}'); do
ceph-objectstore-tool --data-path ${OSD_PATH} --op rm-past-intervals --pgid ${PG};
done
fi
fi
if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then
touch ${OSD_JOURNAL} touch ${OSD_JOURNAL}
chown -R ceph. /var/lib/ceph/journal chown -R ceph. /var/lib/ceph/journal

View File

@ -7,6 +7,7 @@ export LC_ALL=C
: "${JOURNAL_DIR:=/var/lib/ceph/journal}" : "${JOURNAL_DIR:=/var/lib/ceph/journal}"
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}" : "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}"
eval OSD_PG_INTERVAL_FIX=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["osd_pg_interval_fix"]))')
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
@ -117,6 +118,13 @@ fi
mkdir -p /etc/forego/"${CLUSTER}" mkdir -p /etc/forego/"${CLUSTER}"
echo "" > /etc/forego/"${CLUSTER}"/Procfile echo "" > /etc/forego/"${CLUSTER}"/Procfile
# NOTE(supamatt): https://tracker.ceph.com/issues/21142 is impacting us due to the older Ceph version 12.2.3 that we are running
if [ "x${OSD_PG_INTERVAL_FIX}" == "xtrue" ]; then
for PG in $(ls ${OSD_PATH}/current | awk -F'_' '/head/{print $1}'); do
ceph-objectstore-tool --data-path ${OSD_PATH} --op rm-past-intervals --pgid ${PG};
done
fi
for OSD_ID in $(ls /var/lib/ceph/osd | sed 's/.*-//'); do for OSD_ID in $(ls /var/lib/ceph/osd | sed 's/.*-//'); do
OSD_PATH="$OSD_PATH_BASE-$OSD_ID/" OSD_PATH="$OSD_PATH_BASE-$OSD_ID/"
OSD_KEYRING="${OSD_PATH%/}/keyring" OSD_KEYRING="${OSD_PATH%/}/keyring"

View File

@ -132,6 +132,10 @@ conf:
failure_domain_by_hostname: "false" failure_domain_by_hostname: "false"
failure_domain_name: "false" failure_domain_name: "false"
# NOTE(supamatt): Add a configurable option to reset the past interval time of a PG.
# This solves an open bug within Ceph Luminous releases. https://tracker.ceph.com/issues/21142
osd_pg_interval_fix: "false"
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to # NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
# define OSD pods that will be deployed across the cluster. # define OSD pods that will be deployed across the cluster.
# when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create # when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create
@ -164,6 +168,7 @@ conf:
# - name: host1.fqdn # - name: host1.fqdn
# conf: # conf:
# storage: # storage:
# osd_pg_interval_fix: "true"
# failure_domain_name: "rack1" # failure_domain_name: "rack1"
# osd: # osd:
# - data: # - data: