Add job for doing etcd defragmentation (#228)

* Add a Helm chart for a cronjob for doing etcd defrag

* Add HelmRelease to deploy etcd defrag cronjob onto clusters

* Use charts from the repository in tests

* Clarify comment in values

* Reinstate pull_request_target
This commit is contained in:
Matt Pryor 2024-01-25 13:42:53 +00:00 committed by GitHub
parent efb575284b
commit a2d70b146a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 327 additions and 16 deletions

View File

@ -1,8 +1,8 @@
name: Upgrade and test cluster
description: >-
Run a Helm upgrade using the specified values, wait for the cluster to
become ready and run Sonobuoy against it
Run a Helm upgrade using the specified chart version and values, wait for
the cluster to become ready and run Sonobuoy against it
inputs:
name:
@ -16,10 +16,17 @@ inputs:
description: The name of the cloud within the OpenStack clouds file
required: true
default: openstack
chart-directory:
description: The directory containing the chart
chart-repo:
description: The repository to fetch the charts from
required: true
default: https://stackhpc.github.io/capi-helm-charts
chart-name:
description: The name of the chart to use
required: true
default: openstack-cluster
chart-version:
description: The version of the charts to use
required: true
default: charts/openstack-cluster
values-path:
description: The path to a file containing Helm values
required: true
@ -48,14 +55,12 @@ inputs:
runs:
using: "composite"
steps:
- name: Update dependencies for chart
shell: bash
run: helm dependency update ${{ inputs.chart-directory }}
- name: Install or upgrade cluster from directory
shell: bash
run: |-
helm upgrade ${{ inputs.name }} ${{ inputs.chart-directory }} \
helm upgrade ${{ inputs.name }} ${{ inputs.chart-name }} \
--repo ${{ inputs.chart-repo }} \
--version ${{ inputs.chart-version }} \
--install \
--values ${{ inputs.os-client-config-file }} \
--values ${{ inputs.values-path }} \

View File

@ -28,13 +28,22 @@ jobs:
with:
ref: ${{ github.sha }}
publish_charts:
needs: [lint]
uses: ./.github/workflows/publish-charts.yaml
secrets: inherit
with:
ref: ${{ github.sha }}
test:
needs: [mirror_container_images, ensure_capi_images]
needs: [mirror_container_images, ensure_capi_images, publish_charts]
uses: ./.github/workflows/test.yaml
secrets: inherit
with:
# Pass the images as JSON
images: ${{ toJSON(needs.ensure_capi_images.outputs) }}
# Pass the chart version to test
chart-version: ${{ needs.publish_charts.outputs.chart-version }}
# We want to test the current sha
ref: ${{ github.sha }}
# Only run the sanity check on main

View File

@ -44,13 +44,22 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
publish_charts:
needs: [lint]
uses: ./.github/workflows/publish-charts.yaml
secrets: inherit
with:
ref: ${{ github.sha }}
test:
needs: [mirror_container_images, ensure_capi_images]
needs: [mirror_container_images, ensure_capi_images, publish_charts]
uses: ./.github/workflows/test.yaml
secrets: inherit
with:
# Pass the images as JSON
images: ${{ toJSON(needs.ensure_capi_images.outputs) }}
# Pass the chart version to test
chart-version: ${{ needs.publish_charts.outputs.chart-version }}
# We want to test the code in the PR
ref: ${{ github.event.pull_request.head.sha }}
# If the PR is in draft, just run a sanity check

View File

@ -1,15 +1,26 @@
name: publish artifacts
on:
push:
workflow_call:
inputs:
ref:
type: string
description: The Git ref under test.
required: true
outputs:
chart-version:
value: ${{ jobs.build_push_charts.outputs.chart-version }}
jobs:
build_push_chart:
build_push_charts:
name: Build and push Helm charts
runs-on: ubuntu-latest
outputs:
chart-version: ${{ steps.semver.outputs.version }}
steps:
- name: Check out the repository
uses: actions/checkout@v3
with:
ref: ${{ inputs.ref }}
# This is important for the semver action to work correctly
# when determining the number of commits since the last tag
fetch-depth: 0

View File

@ -5,6 +5,9 @@ on:
images:
type: string
description: JSON-encoded dictionary of images and versions
chart-version:
type: string
description: The version of the charts to test
ref:
type: string
description: The Git ref under test.
@ -79,6 +82,7 @@ jobs:
uses: ./.github/actions/upgrade-and-test
with:
name: ci-${{ github.run_id }}-${{ github.job }}
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }}
image-id: ${{ fromJson(inputs.images).kube-1-29-image }}
sonobuoy-mode: ${{ inputs.tests-full && 'certified-conformance' || 'quick' }}
@ -146,6 +150,7 @@ jobs:
uses: ./.github/actions/upgrade-and-test
with:
name: ci-${{ github.run_id }}-${{ github.job }}
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }}
image-id: ${{ fromJson(inputs.images).kube-1-29-image }}
@ -210,6 +215,7 @@ jobs:
uses: ./.github/actions/upgrade-and-test
with:
name: ci-${{ github.run_id }}-${{ github.job }}
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-27-version }}
image-id: ${{ fromJson(inputs.images).kube-1-27-image }}
@ -217,6 +223,7 @@ jobs:
uses: ./.github/actions/upgrade-and-test
with:
name: ci-${{ github.run_id }}-${{ github.job }}
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-28-version }}
image-id: ${{ fromJson(inputs.images).kube-1-28-image }}
@ -224,6 +231,7 @@ jobs:
uses: ./.github/actions/upgrade-and-test
with:
name: ci-${{ github.run_id }}-${{ github.job }}
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }}
image-id: ${{ fromJson(inputs.images).kube-1-29-image }}
@ -319,8 +327,9 @@ jobs:
- name: Deploy cluster with chart from latest tag
uses: ./current/.github/actions/upgrade-and-test
with:
chart-directory: latest-tag/charts/openstack-cluster
name: ci-${{ github.run_id }}-${{ github.job }}
# Deploy using the tagged version here
chart-version: ${{ steps.latest-tag.outputs.tag-name }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }}
image-id: ${{ fromJson(inputs.images).kube-1-29-image }}
@ -332,8 +341,9 @@ jobs:
- name: Upgrade cluster to current chart
uses: ./current/.github/actions/upgrade-and-test
with:
chart-directory: current/charts/openstack-cluster
name: ci-${{ github.run_id }}-${{ github.job }}
# And upgrade to the version under test
chart-version: ${{ inputs.chart-version }}
kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }}
image-id: ${{ fromJson(inputs.images).kube-1-29-image }}

View File

@ -0,0 +1,35 @@
{{- if .Values.etcdDefrag.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }}-config
labels:
{{- include "cluster-addons.componentLabels" (list . "etcd-defrag") | nindent 4 }}
addons.stackhpc.com/watch: ""
stringData:
overrides: |
{{- toYaml .Values.etcdDefrag.release.values | nindent 4 }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: HelmRelease
metadata:
name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }}
labels: {{ include "cluster-addons.componentLabels" (list . "etcd-defrag") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
chart:
repo: {{ .Values.etcdDefrag.chart.repo }}
name: {{ .Values.etcdDefrag.chart.name }}
version: {{ default .Chart.Version .Values.etcdDefrag.chart.version }}
targetNamespace: {{ .Values.etcdDefrag.release.namespace }}
releaseName: etcd-defrag
valuesSources:
- secret:
name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }}-config
key: overrides
{{- end }}

View File

@ -114,6 +114,19 @@ openstack:
name: k8s-keystone-auth
version: 0.0.9
# Settings for etcd defragmentation jobs
etcdDefrag:
# Indicates if the etcd defragmentation job should be enabled
enabled: true
chart:
repo: https://stackhpc.github.io/capi-helm-charts
name: etcd-defrag
version: # Defaults to the same version as this chart
release:
# This should be namespace in which the etcd pods are deployed
namespace: kube-system
values: {}
# Settings for the metrics server
# https://github.com/kubernetes-sigs/metrics-server#helm-chart
metricsServer:

View File

@ -0,0 +1,7 @@
apiVersion: v2
name: etcd-defrag
description: >-
Helm chart for deploying a cronjob to do etcd defragmentation on a kubeadm cluster.
type: application
version: 0.1.0
appVersion: main

View File

@ -0,0 +1,7 @@
# etcd-defrag chart
This chart installs a [CronJob](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/)
that will periodically defragment the etcd keyspace for a cluster that is managed using
[kubeadm](https://kubernetes.io/docs/reference/setup-tools/kubeadm/).
It is installed as part of the [cluster-addons](../cluster-addons).

View File

@ -0,0 +1,58 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "etcd-defrag.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "etcd-defrag.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "etcd-defrag.chart" -}}
{{-
printf "%s-%s" .Chart.Name .Chart.Version |
replace "+" "_" |
trunc 63 |
trimSuffix "-" |
trimSuffix "." |
trimSuffix "_"
}}
{{- end }}
{{/*
Common labels
*/}}
{{- define "etcd-defrag.labels" -}}
helm.sh/chart: {{ include "etcd-defrag.chart" . }}
{{ include "etcd-defrag.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "etcd-defrag.selectorLabels" -}}
app.kubernetes.io/name: {{ include "etcd-defrag.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

View File

@ -0,0 +1,59 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ include "etcd-defrag.fullname" . }}
labels: {{ include "etcd-defrag.labels" . | nindent 4 }}
spec:
schedule: {{ .Values.schedule | quote }}
startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
# Prevent the next job from stomping on one that hasn't finished yet
concurrencyPolicy: Forbid
jobTemplate:
spec:
backoffLimit: {{ .Values.backoffLimit }}
activeDeadlineSeconds: {{ .Values.activeDeadlineSeconds }}
template:
spec:
restartPolicy: Never
serviceAccountName: {{ include "etcd-defrag.fullname" . }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets: {{ toYaml . | nindent 12 }}
{{- end }}
securityContext: {{ toYaml .Values.podSecurityContext | nindent 12 }}
containers:
- name: {{ .Chart.Name }}
image: {{
.Values.image.tag |
default (printf "%s.%s" .Capabilities.KubeVersion.Major .Capabilities.KubeVersion.Minor) |
printf "%s:%s" .Values.image.repository
}}
imagePullPolicy: {{ .Values.image.pullPolicy }}
# We run the defrag by execing into one of the etcd pods
command:
- bash
- -c
- |
set -e
POD_NAME="$(
kubectl get pod \
--namespace {{ .Release.Namespace }} \
--selector component=etcd \
--output go-template='{{ "{{" }}(index .items 0).metadata.name{{ "}}" }}'
)"
kubectl exec --namespace {{ .Release.Namespace }} "$POD_NAME" -- \
etcdctl defrag \
--cluster \
--cacert /etc/kubernetes/pki/etcd/ca.crt \
--cert /etc/kubernetes/pki/etcd/server.crt \
--key /etc/kubernetes/pki/etcd/server.key
securityContext: {{ toYaml .Values.securityContext | nindent 16 }}
resources: {{ toYaml .Values.resources | nindent 16 }}
{{- with .Values.nodeSelector }}
nodeSelector: {{ toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.affinity }}
affinity: {{ toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations: {{ toYaml . | nindent 12 }}
{{- end }}

View File

@ -0,0 +1,22 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "etcd-defrag.fullname" . }}
labels: {{ include "etcd-defrag.labels" . | nindent 4 }}
rules:
# We need to be able to list pods and to exec
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- watch
- get
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- get
- create

View File

@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "etcd-defrag.fullname" . }}
labels: {{ include "etcd-defrag.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
namespace: {{ .Release.Namespace }}
name: {{ include "etcd-defrag.fullname" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ include "etcd-defrag.fullname" . }}

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "etcd-defrag.fullname" . }}
labels: {{ include "etcd-defrag.labels" . | nindent 4 }}

View File

@ -0,0 +1,41 @@
# The schedule for the cronjob (defaults to nightly)
schedule: "0 0 * * *"
# Allow the jobs to start up to 12 hours after the configured time
# If it does not start within this time, just wait for the next one
startingDeadlineSeconds: 43200
# Abandon the defrag after three retries or one hour, whichever is sooner
backoffLimit: 3
activeDeadlineSeconds: 3600
# The kubectl image to use
image:
repository: bitnami/kubectl
pullPolicy: IfNotPresent
tag: "" # Defaults to the Kubernetes minor version, e.g. 1.28
imagePullSecrets: []
# Pod-level security context
podSecurityContext:
runAsNonRoot: true
# Container-level security context
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
readOnlyRootFilesystem: true
# Resources for the kubectl container
resources: {}
# Scheduling parameters for the kubectl pod
nodeSelector: {}
# Allow the pods to run on control plane nodes if they need to
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
affinity: {}

View File

@ -0,0 +1,7 @@
docker.io:
images:
bitnami/kubectl:
- "1.26"
- "1.27"
- "1.28"
- "1.29"