From a2d70b146ae9c69df131e5cac2894d2970ff8ea1 Mon Sep 17 00:00:00 2001 From: Matt Pryor Date: Thu, 25 Jan 2024 13:42:53 +0000 Subject: [PATCH] Add job for doing etcd defragmentation (#228) * Add a Helm chart for a cronjob for doing etcd defrag * Add HelmRelease to deploy etcd defrag cronjob onto clusters * Use charts from the repository in tests * Clarify comment in values * Reinstate pull_request_target --- .github/actions/upgrade-and-test/action.yml | 25 ++++---- .github/workflows/main.yaml | 11 +++- .github/workflows/pr.yaml | 11 +++- ...ish-artifacts.yaml => publish-charts.yaml} | 15 ++++- .github/workflows/test.yaml | 14 ++++- .../cluster-addons/templates/etcd-defrag.yaml | 35 +++++++++++ charts/cluster-addons/values.yaml | 13 ++++ charts/etcd-defrag/Chart.yaml | 7 +++ charts/etcd-defrag/README.md | 7 +++ charts/etcd-defrag/templates/_helpers.tpl | 58 ++++++++++++++++++ charts/etcd-defrag/templates/cronjob.yaml | 59 +++++++++++++++++++ charts/etcd-defrag/templates/role.yaml | 22 +++++++ charts/etcd-defrag/templates/rolebinding.yaml | 13 ++++ .../etcd-defrag/templates/serviceaccount.yaml | 5 ++ charts/etcd-defrag/values.yaml | 41 +++++++++++++ skopeo-manifests/etcd-defrag.yaml | 7 +++ 16 files changed, 327 insertions(+), 16 deletions(-) rename .github/workflows/{publish-artifacts.yaml => publish-charts.yaml} (68%) create mode 100644 charts/cluster-addons/templates/etcd-defrag.yaml create mode 100644 charts/etcd-defrag/Chart.yaml create mode 100644 charts/etcd-defrag/README.md create mode 100644 charts/etcd-defrag/templates/_helpers.tpl create mode 100644 charts/etcd-defrag/templates/cronjob.yaml create mode 100644 charts/etcd-defrag/templates/role.yaml create mode 100644 charts/etcd-defrag/templates/rolebinding.yaml create mode 100644 charts/etcd-defrag/templates/serviceaccount.yaml create mode 100644 charts/etcd-defrag/values.yaml create mode 100644 skopeo-manifests/etcd-defrag.yaml diff --git a/.github/actions/upgrade-and-test/action.yml b/.github/actions/upgrade-and-test/action.yml index 2c67969..a78fb53 100644 --- a/.github/actions/upgrade-and-test/action.yml +++ b/.github/actions/upgrade-and-test/action.yml @@ -1,8 +1,8 @@ name: Upgrade and test cluster description: >- - Run a Helm upgrade using the specified values, wait for the cluster to - become ready and run Sonobuoy against it + Run a Helm upgrade using the specified chart version and values, wait for + the cluster to become ready and run Sonobuoy against it inputs: name: @@ -16,10 +16,17 @@ inputs: description: The name of the cloud within the OpenStack clouds file required: true default: openstack - chart-directory: - description: The directory containing the chart + chart-repo: + description: The repository to fetch the charts from + required: true + default: https://stackhpc.github.io/capi-helm-charts + chart-name: + description: The name of the chart to use + required: true + default: openstack-cluster + chart-version: + description: The version of the charts to use required: true - default: charts/openstack-cluster values-path: description: The path to a file containing Helm values required: true @@ -48,14 +55,12 @@ inputs: runs: using: "composite" steps: - - name: Update dependencies for chart - shell: bash - run: helm dependency update ${{ inputs.chart-directory }} - - name: Install or upgrade cluster from directory shell: bash run: |- - helm upgrade ${{ inputs.name }} ${{ inputs.chart-directory }} \ + helm upgrade ${{ inputs.name }} ${{ inputs.chart-name }} \ + --repo ${{ inputs.chart-repo }} \ + --version ${{ inputs.chart-version }} \ --install \ --values ${{ inputs.os-client-config-file }} \ --values ${{ inputs.values-path }} \ diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index de21690..8dd5058 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,13 +28,22 @@ jobs: with: ref: ${{ github.sha }} + publish_charts: + needs: [lint] + uses: ./.github/workflows/publish-charts.yaml + secrets: inherit + with: + ref: ${{ github.sha }} + test: - needs: [mirror_container_images, ensure_capi_images] + needs: [mirror_container_images, ensure_capi_images, publish_charts] uses: ./.github/workflows/test.yaml secrets: inherit with: # Pass the images as JSON images: ${{ toJSON(needs.ensure_capi_images.outputs) }} + # Pass the chart version to test + chart-version: ${{ needs.publish_charts.outputs.chart-version }} # We want to test the current sha ref: ${{ github.sha }} # Only run the sanity check on main diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 99c5afd..36bd5f3 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -44,13 +44,22 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} + publish_charts: + needs: [lint] + uses: ./.github/workflows/publish-charts.yaml + secrets: inherit + with: + ref: ${{ github.sha }} + test: - needs: [mirror_container_images, ensure_capi_images] + needs: [mirror_container_images, ensure_capi_images, publish_charts] uses: ./.github/workflows/test.yaml secrets: inherit with: # Pass the images as JSON images: ${{ toJSON(needs.ensure_capi_images.outputs) }} + # Pass the chart version to test + chart-version: ${{ needs.publish_charts.outputs.chart-version }} # We want to test the code in the PR ref: ${{ github.event.pull_request.head.sha }} # If the PR is in draft, just run a sanity check diff --git a/.github/workflows/publish-artifacts.yaml b/.github/workflows/publish-charts.yaml similarity index 68% rename from .github/workflows/publish-artifacts.yaml rename to .github/workflows/publish-charts.yaml index f697eed..bafc1e7 100644 --- a/.github/workflows/publish-artifacts.yaml +++ b/.github/workflows/publish-charts.yaml @@ -1,15 +1,26 @@ name: publish artifacts on: - push: + workflow_call: + inputs: + ref: + type: string + description: The Git ref under test. + required: true + outputs: + chart-version: + value: ${{ jobs.build_push_charts.outputs.chart-version }} jobs: - build_push_chart: + build_push_charts: name: Build and push Helm charts runs-on: ubuntu-latest + outputs: + chart-version: ${{ steps.semver.outputs.version }} steps: - name: Check out the repository uses: actions/checkout@v3 with: + ref: ${{ inputs.ref }} # This is important for the semver action to work correctly # when determining the number of commits since the last tag fetch-depth: 0 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d2632c8..ce1aa56 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,6 +5,9 @@ on: images: type: string description: JSON-encoded dictionary of images and versions + chart-version: + type: string + description: The version of the charts to test ref: type: string description: The Git ref under test. @@ -79,6 +82,7 @@ jobs: uses: ./.github/actions/upgrade-and-test with: name: ci-${{ github.run_id }}-${{ github.job }} + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} image-id: ${{ fromJson(inputs.images).kube-1-29-image }} sonobuoy-mode: ${{ inputs.tests-full && 'certified-conformance' || 'quick' }} @@ -146,6 +150,7 @@ jobs: uses: ./.github/actions/upgrade-and-test with: name: ci-${{ github.run_id }}-${{ github.job }} + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} image-id: ${{ fromJson(inputs.images).kube-1-29-image }} @@ -210,6 +215,7 @@ jobs: uses: ./.github/actions/upgrade-and-test with: name: ci-${{ github.run_id }}-${{ github.job }} + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-27-version }} image-id: ${{ fromJson(inputs.images).kube-1-27-image }} @@ -217,6 +223,7 @@ jobs: uses: ./.github/actions/upgrade-and-test with: name: ci-${{ github.run_id }}-${{ github.job }} + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-28-version }} image-id: ${{ fromJson(inputs.images).kube-1-28-image }} @@ -224,6 +231,7 @@ jobs: uses: ./.github/actions/upgrade-and-test with: name: ci-${{ github.run_id }}-${{ github.job }} + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} image-id: ${{ fromJson(inputs.images).kube-1-29-image }} @@ -319,8 +327,9 @@ jobs: - name: Deploy cluster with chart from latest tag uses: ./current/.github/actions/upgrade-and-test with: - chart-directory: latest-tag/charts/openstack-cluster name: ci-${{ github.run_id }}-${{ github.job }} + # Deploy using the tagged version here + chart-version: ${{ steps.latest-tag.outputs.tag-name }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} image-id: ${{ fromJson(inputs.images).kube-1-29-image }} @@ -332,8 +341,9 @@ jobs: - name: Upgrade cluster to current chart uses: ./current/.github/actions/upgrade-and-test with: - chart-directory: current/charts/openstack-cluster name: ci-${{ github.run_id }}-${{ github.job }} + # And upgrade to the version under test + chart-version: ${{ inputs.chart-version }} kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} image-id: ${{ fromJson(inputs.images).kube-1-29-image }} diff --git a/charts/cluster-addons/templates/etcd-defrag.yaml b/charts/cluster-addons/templates/etcd-defrag.yaml new file mode 100644 index 0000000..4e37cbc --- /dev/null +++ b/charts/cluster-addons/templates/etcd-defrag.yaml @@ -0,0 +1,35 @@ +{{- if .Values.etcdDefrag.enabled }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }}-config + labels: + {{- include "cluster-addons.componentLabels" (list . "etcd-defrag") | nindent 4 }} + addons.stackhpc.com/watch: "" +stringData: + overrides: | + {{- toYaml .Values.etcdDefrag.release.values | nindent 4 }} +--- +apiVersion: addons.stackhpc.com/v1alpha1 +kind: HelmRelease +metadata: + name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }} + labels: {{ include "cluster-addons.componentLabels" (list . "etcd-defrag") | nindent 4 }} + annotations: + # Tell Argo to ignore the non-controller owner references for this object + argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true" +spec: + clusterName: {{ include "cluster-addons.clusterName" . }} + bootstrap: true + chart: + repo: {{ .Values.etcdDefrag.chart.repo }} + name: {{ .Values.etcdDefrag.chart.name }} + version: {{ default .Chart.Version .Values.etcdDefrag.chart.version }} + targetNamespace: {{ .Values.etcdDefrag.release.namespace }} + releaseName: etcd-defrag + valuesSources: + - secret: + name: {{ include "cluster-addons.componentName" (list . "etcd-defrag") }}-config + key: overrides +{{- end }} diff --git a/charts/cluster-addons/values.yaml b/charts/cluster-addons/values.yaml index 58334fc..343a01c 100644 --- a/charts/cluster-addons/values.yaml +++ b/charts/cluster-addons/values.yaml @@ -114,6 +114,19 @@ openstack: name: k8s-keystone-auth version: 0.0.9 +# Settings for etcd defragmentation jobs +etcdDefrag: + # Indicates if the etcd defragmentation job should be enabled + enabled: true + chart: + repo: https://stackhpc.github.io/capi-helm-charts + name: etcd-defrag + version: # Defaults to the same version as this chart + release: + # This should be namespace in which the etcd pods are deployed + namespace: kube-system + values: {} + # Settings for the metrics server # https://github.com/kubernetes-sigs/metrics-server#helm-chart metricsServer: diff --git a/charts/etcd-defrag/Chart.yaml b/charts/etcd-defrag/Chart.yaml new file mode 100644 index 0000000..dbb9f12 --- /dev/null +++ b/charts/etcd-defrag/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: etcd-defrag +description: >- + Helm chart for deploying a cronjob to do etcd defragmentation on a kubeadm cluster. +type: application +version: 0.1.0 +appVersion: main diff --git a/charts/etcd-defrag/README.md b/charts/etcd-defrag/README.md new file mode 100644 index 0000000..9795374 --- /dev/null +++ b/charts/etcd-defrag/README.md @@ -0,0 +1,7 @@ +# etcd-defrag chart + +This chart installs a [CronJob](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/) +that will periodically defragment the etcd keyspace for a cluster that is managed using +[kubeadm](https://kubernetes.io/docs/reference/setup-tools/kubeadm/). + +It is installed as part of the [cluster-addons](../cluster-addons). diff --git a/charts/etcd-defrag/templates/_helpers.tpl b/charts/etcd-defrag/templates/_helpers.tpl new file mode 100644 index 0000000..cb721db --- /dev/null +++ b/charts/etcd-defrag/templates/_helpers.tpl @@ -0,0 +1,58 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "etcd-defrag.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "etcd-defrag.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "etcd-defrag.chart" -}} +{{- + printf "%s-%s" .Chart.Name .Chart.Version | + replace "+" "_" | + trunc 63 | + trimSuffix "-" | + trimSuffix "." | + trimSuffix "_" +}} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "etcd-defrag.labels" -}} +helm.sh/chart: {{ include "etcd-defrag.chart" . }} +{{ include "etcd-defrag.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "etcd-defrag.selectorLabels" -}} +app.kubernetes.io/name: {{ include "etcd-defrag.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/charts/etcd-defrag/templates/cronjob.yaml b/charts/etcd-defrag/templates/cronjob.yaml new file mode 100644 index 0000000..76efd66 --- /dev/null +++ b/charts/etcd-defrag/templates/cronjob.yaml @@ -0,0 +1,59 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "etcd-defrag.fullname" . }} + labels: {{ include "etcd-defrag.labels" . | nindent 4 }} +spec: + schedule: {{ .Values.schedule | quote }} + startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }} + # Prevent the next job from stomping on one that hasn't finished yet + concurrencyPolicy: Forbid + jobTemplate: + spec: + backoffLimit: {{ .Values.backoffLimit }} + activeDeadlineSeconds: {{ .Values.activeDeadlineSeconds }} + template: + spec: + restartPolicy: Never + serviceAccountName: {{ include "etcd-defrag.fullname" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: {{ toYaml . | nindent 12 }} + {{- end }} + securityContext: {{ toYaml .Values.podSecurityContext | nindent 12 }} + containers: + - name: {{ .Chart.Name }} + image: {{ + .Values.image.tag | + default (printf "%s.%s" .Capabilities.KubeVersion.Major .Capabilities.KubeVersion.Minor) | + printf "%s:%s" .Values.image.repository + }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + # We run the defrag by execing into one of the etcd pods + command: + - bash + - -c + - | + set -e + POD_NAME="$( + kubectl get pod \ + --namespace {{ .Release.Namespace }} \ + --selector component=etcd \ + --output go-template='{{ "{{" }}(index .items 0).metadata.name{{ "}}" }}' + )" + kubectl exec --namespace {{ .Release.Namespace }} "$POD_NAME" -- \ + etcdctl defrag \ + --cluster \ + --cacert /etc/kubernetes/pki/etcd/ca.crt \ + --cert /etc/kubernetes/pki/etcd/server.crt \ + --key /etc/kubernetes/pki/etcd/server.key + securityContext: {{ toYaml .Values.securityContext | nindent 16 }} + resources: {{ toYaml .Values.resources | nindent 16 }} + {{- with .Values.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.affinity }} + affinity: {{ toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: {{ toYaml . | nindent 12 }} + {{- end }} diff --git a/charts/etcd-defrag/templates/role.yaml b/charts/etcd-defrag/templates/role.yaml new file mode 100644 index 0000000..67fbad7 --- /dev/null +++ b/charts/etcd-defrag/templates/role.yaml @@ -0,0 +1,22 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "etcd-defrag.fullname" . }} + labels: {{ include "etcd-defrag.labels" . | nindent 4 }} +rules: + # We need to be able to list pods and to exec + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - watch + - get + - apiGroups: + - "" + resources: + - pods/exec + verbs: + - get + - create diff --git a/charts/etcd-defrag/templates/rolebinding.yaml b/charts/etcd-defrag/templates/rolebinding.yaml new file mode 100644 index 0000000..2120968 --- /dev/null +++ b/charts/etcd-defrag/templates/rolebinding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "etcd-defrag.fullname" . }} + labels: {{ include "etcd-defrag.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + namespace: {{ .Release.Namespace }} + name: {{ include "etcd-defrag.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "etcd-defrag.fullname" . }} diff --git a/charts/etcd-defrag/templates/serviceaccount.yaml b/charts/etcd-defrag/templates/serviceaccount.yaml new file mode 100644 index 0000000..59337b2 --- /dev/null +++ b/charts/etcd-defrag/templates/serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "etcd-defrag.fullname" . }} + labels: {{ include "etcd-defrag.labels" . | nindent 4 }} diff --git a/charts/etcd-defrag/values.yaml b/charts/etcd-defrag/values.yaml new file mode 100644 index 0000000..ed6fa8c --- /dev/null +++ b/charts/etcd-defrag/values.yaml @@ -0,0 +1,41 @@ +# The schedule for the cronjob (defaults to nightly) +schedule: "0 0 * * *" +# Allow the jobs to start up to 12 hours after the configured time +# If it does not start within this time, just wait for the next one +startingDeadlineSeconds: 43200 + +# Abandon the defrag after three retries or one hour, whichever is sooner +backoffLimit: 3 +activeDeadlineSeconds: 3600 + +# The kubectl image to use +image: + repository: bitnami/kubectl + pullPolicy: IfNotPresent + tag: "" # Defaults to the Kubernetes minor version, e.g. 1.28 + +imagePullSecrets: [] + +# Pod-level security context +podSecurityContext: + runAsNonRoot: true + +# Container-level security context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ALL] + readOnlyRootFilesystem: true + +# Resources for the kubectl container +resources: {} + +# Scheduling parameters for the kubectl pod +nodeSelector: {} +# Allow the pods to run on control plane nodes if they need to +tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + effect: NoSchedule +affinity: {} diff --git a/skopeo-manifests/etcd-defrag.yaml b/skopeo-manifests/etcd-defrag.yaml new file mode 100644 index 0000000..f3a46d5 --- /dev/null +++ b/skopeo-manifests/etcd-defrag.yaml @@ -0,0 +1,7 @@ +docker.io: + images: + bitnami/kubectl: + - "1.26" + - "1.27" + - "1.28" + - "1.29"