Document howto recover from a Ceph namspace deletion
Change-Id: Ib1b03cd046fbdad6f18478cfa9c9f0bf70ec9430
This commit is contained in:
parent
5bf9c26bd8
commit
cfc2d4abd8
@ -8,3 +8,4 @@ Ceph Resiliency
|
||||
README
|
||||
failure-domain
|
||||
validate-object-replication
|
||||
namespace-deletion
|
||||
|
222
doc/source/testing/ceph-resiliency/namespace-deletion.rst
Normal file
222
doc/source/testing/ceph-resiliency/namespace-deletion.rst
Normal file
@ -0,0 +1,222 @@
|
||||
===============================
|
||||
3. Namespace deletion recovery
|
||||
===============================
|
||||
|
||||
This document captures steps to bring Ceph back up after deleting it's associated namespace.
|
||||
|
||||
3.1 Setup
|
||||
==========
|
||||
|
||||
.. note::
|
||||
Follow OSH single node or multinode guide to bring up OSH envronment.
|
||||
|
||||
3.2 Setup the OSH environment and check ceph cluster health
|
||||
=============================================================
|
||||
|
||||
.. note::
|
||||
Ensure a healthy ceph cluster is running.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
kubectl exec -n ceph ceph-mon-dtw6m -- ceph -s
|
||||
cluster:
|
||||
id: fbaf9ce8-5408-4fce-9bfe-bf7fb938474c
|
||||
health: HEALTH_OK
|
||||
|
||||
services:
|
||||
mon: 5 daemons, quorum osh-1,osh-2,osh-5,osh-4,osh-3
|
||||
mgr: osh-3(active), standbys: osh-4
|
||||
mds: cephfs-1/1/1 up {0=mds-ceph-mds-77dc68f476-jb5th=up:active}, 1 up:standby
|
||||
osd: 15 osds: 15 up, 15 in
|
||||
|
||||
data:
|
||||
pools: 18 pools, 182 pgs
|
||||
objects: 21 objects, 2246 bytes
|
||||
usage: 3025 MB used, 1496 GB / 1499 GB avail
|
||||
pgs: 182 active+clean
|
||||
|
||||
- Ceph cluster is in HEALTH_OK state with 5 MONs and 15 OSDs.
|
||||
|
||||
3.3 Delete Ceph namespace
|
||||
==========================
|
||||
|
||||
.. note::
|
||||
Removing the namespace will delete all pods and secrets associated to Ceph.
|
||||
!! DO NOT PROCEED WITH DELETING THE CEPH NAMESPACES ON A PRODUCTION ENVIRONMENT !!
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
CEPH_NAMESPACE="ceph"
|
||||
MON_POD=$(kubectl get pods --namespace=${CEPH_NAMESPACE} \
|
||||
--selector="application=ceph" --selector="component=mon" \
|
||||
--no-headers | awk '{ print $1; exit }')
|
||||
|
||||
kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph status \
|
||||
| awk '/id:/{print $2}' | tee /tmp/ceph-fs-uuid.txt
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
kubectl delete namespace ${CEPH_NAMESPACE}
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
kubectl get pods --namespace ${CEPH_NAMESPACE} -o wide
|
||||
No resources found.
|
||||
|
||||
kubectl get secrets --namespace ${CEPH_NAMESPACE}
|
||||
No resources found.
|
||||
|
||||
- Ceph namespace is currently deleted and all associated resources will be not found.
|
||||
|
||||
3.4 Reinstall Ceph charts
|
||||
==========================
|
||||
|
||||
.. note::
|
||||
Instructions are specific to a multinode environment.
|
||||
For AIO environments follow the development guide for reinstalling Ceph.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
helm delete --purge ceph-openstack-config
|
||||
|
||||
for chart in $(helm list --namespace ${CEPH_NAMESPACE} | awk '/ceph-/{print $1}'); do
|
||||
helm delete ${chart} --purge;
|
||||
done
|
||||
|
||||
.. note::
|
||||
It will be normal not to see all PODs come back online during a reinstall.
|
||||
Only the ceph-mon helm chart is required.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
cd /opt/openstack-helm-infra/
|
||||
./tools/deployment/multinode/030-ceph.sh
|
||||
|
||||
3.5 Disable CephX authentication
|
||||
=================================
|
||||
|
||||
.. note::
|
||||
Wait until MON pods are running before proceeding here.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
mkdir -p /tmp/ceph/ceph-templates /tmp/ceph/extracted-keys
|
||||
|
||||
kubectl get -n ${CEPH_NAMESPACE} configmaps ceph-mon-etc -o=jsonpath='{.data.ceph\.conf}' > /tmp/ceph/ceph-mon.conf
|
||||
sed '/\[global\]/a auth_client_required = none' /tmp/ceph/ceph-mon.conf | \
|
||||
sed '/\[global\]/a auth_service_required = none' | \
|
||||
sed '/\[global\]/a auth_cluster_required = none' > /tmp/ceph/ceph-mon-noauth.conf
|
||||
|
||||
kubectl --namespace ${CEPH_NAMESPACE} delete configmap ceph-mon-etc
|
||||
kubectl --namespace ${CEPH_NAMESPACE} create configmap ceph-mon-etc --from-file=ceph.conf=/tmp/ceph/ceph-mon-noauth.conf
|
||||
|
||||
kubectl delete pod --namespace ${CEPH_NAMESPACE} -l application=ceph,component=mon
|
||||
|
||||
.. note::
|
||||
Wait until the MON pods are running before proceeding here.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
MON_POD=$(kubectl get pods --namespace=${CEPH_NAMESPACE} \
|
||||
--selector="application=ceph" --selector="component=mon" \
|
||||
--no-headers | awk '{ print $1; exit }')
|
||||
|
||||
kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph status
|
||||
|
||||
- The Ceph cluster will not be healthy and in a HEALTH_WARN or HEALTH_ERR state.
|
||||
|
||||
3.6 Replace key secrets with ones extracted from a Ceph MON
|
||||
============================================================
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
tee /tmp/ceph/ceph-templates/mon <<EOF
|
||||
[mon.]
|
||||
key = $(kubectl --namespace ${CEPH_NAMESPACE} exec ${MON_POD} -- bash -c "ceph-authtool -l \"/var/lib/ceph/mon/ceph-\$(hostname)/keyring\"" | awk '/key =/ {print $NF}')
|
||||
caps mon = "allow *"
|
||||
EOF
|
||||
|
||||
for KEY in mds osd rgw; do
|
||||
tee /tmp/ceph/ceph-templates/${KEY} <<EOF
|
||||
[client.bootstrap-${KEY}]
|
||||
key = $(kubectl --namespace ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph auth get-key client.bootstrap-${KEY})
|
||||
caps mon = "allow profile bootstrap-${KEY}"
|
||||
EOF
|
||||
done
|
||||
|
||||
tee /tmp/ceph/ceph-templates/admin <<EOF
|
||||
[client.admin]
|
||||
key = $(kubectl --namespace ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph auth get-key client.admin)
|
||||
auid = 0
|
||||
caps mds = "allow"
|
||||
caps mon = "allow *"
|
||||
caps osd = "allow *"
|
||||
caps mgr = "allow *"
|
||||
EOF
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
tee /tmp/ceph/ceph-key-relationships <<EOF
|
||||
mon ceph-mon-keyring ceph.mon.keyring mon.
|
||||
mds ceph-bootstrap-mds-keyring ceph.keyring client.bootstrap-mds
|
||||
osd ceph-bootstrap-osd-keyring ceph.keyring client.bootstrap-osd
|
||||
rgw ceph-bootstrap-rgw-keyring ceph.keyring client.bootstrap-rgw
|
||||
admin ceph-client-admin-keyring ceph.client.admin.keyring client.admin
|
||||
EOF
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
while read CEPH_KEY_RELATIONS; do
|
||||
KEY_RELATIONS=($(echo ${CEPH_KEY_RELATIONS}))
|
||||
COMPONENT=${KEY_RELATIONS[0]}
|
||||
KUBE_SECRET_NAME=${KEY_RELATIONS[1]}
|
||||
KUBE_SECRET_DATA_KEY=${KEY_RELATIONS[2]}
|
||||
KEYRING_NAME=${KEY_RELATIONS[3]}
|
||||
DATA_PATCH=$(cat /tmp/ceph/ceph-templates/${COMPONENT} | envsubst | base64 -w0)
|
||||
kubectl --namespace ${CEPH_NAMESPACE} patch secret ${KUBE_SECRET_NAME} -p "{\"data\":{\"${KUBE_SECRET_DATA_KEY}\": \"${DATA_PATCH}\"}}"
|
||||
done < /tmp/ceph/ceph-key-relationships
|
||||
|
||||
3.7 Re-enable CephX Authentication
|
||||
===================================
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
kubectl --namespace ${CEPH_NAMESPACE} delete configmap ceph-mon-etc
|
||||
kubectl --namespace ${CEPH_NAMESPACE} create configmap ceph-mon-etc --from-file=ceph.conf=/tmp/ceph/ceph-mon.conf
|
||||
|
||||
3.8 Reinstall Ceph charts
|
||||
==========================
|
||||
|
||||
.. note::
|
||||
Instructions are specific to a multinode environment.
|
||||
For AIO environments follow the development guide for reinstalling Ceph.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
for chart in $(helm list --namespace ${CEPH_NAMESPACE} | awk '/ceph-/{print $1}'); do
|
||||
helm delete ${chart} --purge;
|
||||
done
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
cd /opt/openstack-helm-infra/
|
||||
./tools/deployment/multinode/030-ceph.sh
|
||||
./tools/deployment/multinode/040-ceph-ns-activate.sh
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
MON_POD=$(kubectl get pods --namespace=${CEPH_NAMESPACE} \
|
||||
--selector="application=ceph" --selector="component=mon" \
|
||||
--no-headers | awk '{ print $1; exit }')
|
||||
|
||||
kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph status
|
||||
|
||||
.. note::
|
||||
AIO environments will need the following command to repair MDS standby failures.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph fs set cephfs standby_count_wanted 0
|
||||
|
||||
- Ceph pods are now running and cluster is healthy (HEALTH_OK).
|
||||
|
Loading…
Reference in New Issue
Block a user