From 75278a856c94b2f66db8dc7b6800916f930d25dc Mon Sep 17 00:00:00 2001 From: kgoncalv Date: Thu, 11 Sep 2025 10:00:18 -0300 Subject: [PATCH] Allow Rook ceph auto-estimate pg's per pool This change aims to allow the rook ceph auto-estimation of pg's per pool. Placement groups (PGs) are subsets of each logical Ceph pool. Placement groups perform the function of placing objects (as a group)into OSDs.Ceph manages data internally at placement-group granularity: this scales better than would managing individual RADOS objects. It fixes an issue when using a simplex environment with only 1 osd it raises a warning alarm where there is too many pg's per pool, being 250 the limit and when applied there's more than this limit recommended. Rook ceph is using the reef version which doesn't require to specify the pg num at the pool creation[1] while in nautilus version the pg num specification is a must at the pool creation[2]. Thinking about the different approaches between those versions, a retro compatibility is also being implemented to preserve the pool creation in both versions, the self estimate function is preserved. comparison: simplex rook ceph without the patch: POOL PG_NUM .mgr 32 kube-cephfs-metadata 16 kube-rbd 32 kube-cephfs-data 32 images 64 cinder.backups 64 cinder-volumes 64 simplex rook ceph with the patch: POOL PG_NUM kube-rbd 32 .mgr 1 kube-cephfs-metadata 16 kube-cephfs-data 32 images 32 cinder.backups 32 cinder-volumes 32 simplex host ceph before and after the patch: dumped pgs_brief kube-rbd (1): 64 PGs dumped pgs_brief kube-cephfs-data (2): 64 PGs dumped pgs_brief kube-cephfs-metadata (3): 64 PGs dumped pgs_brief images (4): 64 PGs dumped pgs_brief cinder.backups (5): 64 PGs dumped pgs_brief cinder-volumes (6): 64 PGs Standard rook ceph without the patch: POOL PG_NUM kube-rbd 32 .mgr 1 kube-cephfs-metadata 16 kube-cephfs-data 32 images 32 cinder.backups 32 cinder-volumes 32 standard rook ceph with the patch: POOL PG_NUM kube-rbd 32 .mgr 1 kube-cephfs-metadata 16 kube-cephfs-data 32 images 32 cinder.backups 32 cinder-volumes 32 Test plan: simplex: rook-ceph: PASS - build openstack PASS - apply openstack PASS - create vm's PASS - ping between vm's PASS - volume creation/backup creation PASS - validate alarm for total pg's host-ceph: PASS - build openstack PASS - apply openstack PASS - create vm's PASS - ping between vm's PASS - volume creation/backup creation PASS - validate alarm for total pg's standard: rook-ceph: PASS - build openstack PASS - apply openstack PASS - create vm's PASS - ping between vm's PASS - volume creation/backup creation PASS - validate alarm for total pg's host-ceph: PASS - build openstack PASS - apply openstack PASS - create vm's PASS - ping between vm's PASS - volume creation/backup creation PASS - validate alarm for total pg's miscellaneous: PASS - change pool pg_num through user-overrides References: [1] - https://docs.ceph.com/en/reef/rados/operations/placement-groups/#preselecting-pg-num [2] - https://docs.ceph.com/en/nautilus/rados/operations/placement-groups/#a-preselection-of-pg-num Closes-Bug: 2122620 Change-Id: I018f7302328c3789864d7f7875fe7d2b4b31f7ee Signed-off-by: kgoncalv --- ...0028-Allow-rook-ceph-auto-estimation.patch | 94 +++++++++++++++++++ .../debian/deb_folder/patches/series | 3 +- .../k8sapp_openstack/common/constants.py | 6 ++ .../k8sapp_openstack/helm/cinder.py | 9 +- .../k8sapp_openstack/helm/glance.py | 5 +- .../k8sapp_openstack/helm/nova.py | 5 +- 6 files changed, 112 insertions(+), 10 deletions(-) create mode 100644 openstack-helm/debian/deb_folder/patches/0028-Allow-rook-ceph-auto-estimation.patch diff --git a/openstack-helm/debian/deb_folder/patches/0028-Allow-rook-ceph-auto-estimation.patch b/openstack-helm/debian/deb_folder/patches/0028-Allow-rook-ceph-auto-estimation.patch new file mode 100644 index 00000000..3c9450fb --- /dev/null +++ b/openstack-helm/debian/deb_folder/patches/0028-Allow-rook-ceph-auto-estimation.patch @@ -0,0 +1,94 @@ +From 534797f4235187f5f3d0a4a558ba487379855464 Mon Sep 17 00:00:00 2001 +From: kgoncalv +Date: Fri, 12 Sep 2025 11:12:11 -0300 +Subject: [PATCH] Allow rook ceph auto estimation + +This patch aims to allow rook ceph to use his feature to auto estimate +the pg_num per pool, to avoid a health warning issue on a simplex with +a single osd where it was too many pg per osd. +This issue only occurs on a simplex with only one osd. +To allow the auto estimation feature of rook ceph the argument that +add the minimum amount of pg must be removed, so ceph can estimate +automatically. +This patch also allows the retrocompatibility for ceph versions that +do not support this auto estimation. + +Signed-off-by: kgoncalv +--- + cinder/templates/bin/_backup-storage-init.sh.tpl | 6 +++++- + cinder/templates/bin/_storage-init.sh.tpl | 6 +++++- + glance/templates/bin/_storage-init.sh.tpl | 6 +++++- + nova/templates/bin/_storage-init.sh.tpl | 6 +++++- + 4 files changed, 20 insertions(+), 4 deletions(-) + +diff --git a/cinder/templates/bin/_backup-storage-init.sh.tpl b/cinder/templates/bin/_backup-storage-init.sh.tpl +index 1601172a1..a9752e854 100644 +--- a/cinder/templates/bin/_backup-storage-init.sh.tpl ++++ b/cinder/templates/bin/_backup-storage-init.sh.tpl +@@ -31,7 +31,11 @@ if [[ $STORAGE_BACKEND =~ 'cinder.backup.drivers.swift' ]] || \ + elif [[ $STORAGE_BACKEND =~ 'cinder.backup.drivers.ceph' ]]; then + ceph -s + function ensure_pool () { +- ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ if [ "$2" -eq 0 ]; then ++ ceph osd pool stats $1 || ceph osd pool create $1 ++ else ++ ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ fi + + # As of the Luminous release, it is mandatory to enable applications on pools. + # To find out if the release is greater than (or equal to) Luminous, just check +diff --git a/cinder/templates/bin/_storage-init.sh.tpl b/cinder/templates/bin/_storage-init.sh.tpl +index 4d1c28e4e..53b0b071c 100644 +--- a/cinder/templates/bin/_storage-init.sh.tpl ++++ b/cinder/templates/bin/_storage-init.sh.tpl +@@ -28,7 +28,11 @@ set -ex + if [ "x$STORAGE_BACKEND" == "xcinder.volume.drivers.rbd.RBDDriver" ]; then + ceph -s + function ensure_pool () { +- ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ if [ "$2" -eq 0 ]; then ++ ceph osd pool stats $1 || ceph osd pool create $1 ++ else ++ ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ fi + + # As of the Luminous release, it is mandatory to enable applications on pools. + # To find out if the release is greater than (or equal to) Luminous, just check +diff --git a/glance/templates/bin/_storage-init.sh.tpl b/glance/templates/bin/_storage-init.sh.tpl +index 241c62e96..f780ff9e8 100644 +--- a/glance/templates/bin/_storage-init.sh.tpl ++++ b/glance/templates/bin/_storage-init.sh.tpl +@@ -44,7 +44,11 @@ elif [ "x$STORAGE_BACKEND" == "xswift" ]; then + elif [ "x$STORAGE_BACKEND" == "xrbd" ]; then + ceph -s + function ensure_pool () { +- ceph osd pool stats "$1" || ceph osd pool create "$1" "$2" ++ if [ "$2" -eq 0 ]; then ++ ceph osd pool stats $1 || ceph osd pool create $1 ++ else ++ ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ fi + + # As of the Luminous release, it is mandatory to enable applications on pools. + # To find out if the release is greater than (or equal to) Luminous, just check +diff --git a/nova/templates/bin/_storage-init.sh.tpl b/nova/templates/bin/_storage-init.sh.tpl +index 70c71a9f4..ed56e27b9 100644 +--- a/nova/templates/bin/_storage-init.sh.tpl ++++ b/nova/templates/bin/_storage-init.sh.tpl +@@ -28,7 +28,11 @@ set -ex + if [ "x$STORAGE_BACKEND" == "xrbd" ]; then + ceph -s + function ensure_pool () { +- ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ if [ "$2" -eq 0 ]; then ++ ceph osd pool stats $1 || ceph osd pool create $1 ++ else ++ ceph osd pool stats $1 || ceph osd pool create $1 $2 ++ fi + if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -ge 12 ]]; then + ceph osd pool application enable $1 $3 + fi +-- +2.34.1 + diff --git a/openstack-helm/debian/deb_folder/patches/series b/openstack-helm/debian/deb_folder/patches/series index 5b7ec20b..71478525 100644 --- a/openstack-helm/debian/deb_folder/patches/series +++ b/openstack-helm/debian/deb_folder/patches/series @@ -24,4 +24,5 @@ 0024-Enable-ceph-pool-creation-for-AIO-systems.patch 0025-Add-IPv6-compatibility-to-neutron-openvswitch-agent.patch 0026-Copy-host-UUID-into-Nova-s-config-dir.patch -0027-Add-retry-to-hostname-reading-by-neutron-agents.patch \ No newline at end of file +0027-Add-retry-to-hostname-reading-by-neutron-agents.patch +0028-Allow-rook-ceph-auto-estimation.patch diff --git a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/common/constants.py b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/common/constants.py index e19ec125..eb63226b 100644 --- a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/common/constants.py +++ b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/common/constants.py @@ -129,6 +129,12 @@ CEPH_POOL_BACKUP_NAME = 'backup' CEPH_POOL_BACKUP_APP_NAME = 'cinder-backup' CEPH_POOL_BACKUP_CHUNK_SIZE = 256 +# Rook ceph constants +ROOK_CEPH_POOL_CINDER_VOLUME_CHUNK_SIZE = 0 +ROOK_CEPH_POOL_CINDER_BACKUP_CHUNK_SIZE = 0 +ROOK_CEPH_POOL_GLANCE_CHUNK_SIZE = 0 +ROOK_CEPH_POOL_NOVA_RBD_CHUNK_SIZE = 0 + # Cinder version used as the default value when getting service name and type CINDER_CURRENT_VERSION = 'v3' diff --git a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/cinder.py b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/cinder.py index c52658d3..ab3b4232 100644 --- a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/cinder.py +++ b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/cinder.py @@ -432,21 +432,16 @@ class CinderHelm(openstack.OpenstackBaseHelm): replication, _ = storage_backend_conf\ .StorageBackendConfig\ .get_ceph_pool_replication(self.dbapi, ceph_backend=rook_backend) - - chunk_size = self._estimate_ceph_pool_pg_num(self.dbapi.istor_get_all()) - pools = { f'{app_constants.CEPH_POOL_VOLUMES_NAME}': { 'app_name': app_constants.CEPH_POOL_VOLUMES_APP_NAME, - 'chunk_size': min(chunk_size, - app_constants.CEPH_POOL_VOLUMES_CHUNK_SIZE), + 'chunk_size': app_constants.ROOK_CEPH_POOL_CINDER_VOLUME_CHUNK_SIZE, 'crush_rule': app_constants.CEPH_ROOK_POLL_CRUSH_RULE, 'replication': replication, }, f'{app_constants.CEPH_POOL_BACKUP_NAME}': { 'app_name': app_constants.CEPH_POOL_BACKUP_APP_NAME, - 'chunk_size': min(chunk_size, - app_constants.CEPH_POOL_BACKUP_CHUNK_SIZE), + 'chunk_size': app_constants.ROOK_CEPH_POOL_CINDER_BACKUP_CHUNK_SIZE, 'crush_rule': app_constants.CEPH_ROOK_POLL_CRUSH_RULE, 'replication': replication, }, diff --git a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/glance.py b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/glance.py index b48595a4..2e2471d4 100644 --- a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/glance.py +++ b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/glance.py @@ -153,7 +153,10 @@ class GlanceHelm(openstack.OpenstackBaseHelm): else: rule_name = app_constants.CEPH_ROOK_POLL_CRUSH_RULE - chunk_size = self._estimate_ceph_pool_pg_num(self.dbapi.istor_get_all()) + if self._rook_ceph: + chunk_size = app_constants.ROOK_CEPH_POOL_GLANCE_CHUNK_SIZE + else: + chunk_size = self._estimate_ceph_pool_pg_num(self.dbapi.istor_get_all()) conf = { 'glance': { diff --git a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/nova.py b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/nova.py index e8b28681..25093e69 100644 --- a/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/nova.py +++ b/python3-k8sapp-openstack/k8sapp_openstack/k8sapp_openstack/helm/nova.py @@ -740,7 +740,10 @@ class NovaHelm(openstack.OpenstackBaseHelm): rook_ceph_rule_name = app_constants.CEPH_ROOK_POLL_CRUSH_RULE rule_name = rook_ceph_rule_name if self._rook_ceph else ceph_rule_name - chunk_size = self._estimate_ceph_pool_pg_num(self.dbapi.istor_get_all()) + if self._rook_ceph: + chunk_size = app_constants.ROOK_CEPH_POOL_NOVA_RBD_CHUNK_SIZE + else: + chunk_size = self._estimate_ceph_pool_pg_num(self.dbapi.istor_get_all()) # Form the dictionary with the info for the ephemeral pool. # If needed, multiple pools can be specified.