magnum-capi-helm-charts/charts/openstack-cluster/templates/addons/nvidia-gpu-operator.yaml

{{- if and .Values.addons.enabled .Values.addons.nvidiaGPUOperator.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
  name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
  labels:
    {{- include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") | nindent 4 }}
    {{ .Values.addons.watchLabel }}: ""
stringData:
  defaults: |
    # Use the shared NFD
    nfd:
      enabled: false
    # Export operator and node metrics in a Prometheus format.
    # The component provides information on the status of the
    # operator (e.g. reconciliation status, number of GPU enabled nodes).
    nodeStatusExporter:
      enabled: true
    toolkit:
      # Allowing the toolkit to edit /etc/containerd/config.toml (the default)
      # breaks nvidia pod deployment on clusters with Harbor cache enabled.
      # Instead make a new config file specifically for nvidia runtime config,
      # which is parsed as an "include" in the main containerd config file.
      #
      # https://github.com/NVIDIA/gpu-operator/issues/301
      env:
        - name: "CONTAINERD_CONFIG"
          value: "/etc/containerd/conf.d/nvidia.toml"
  overrides: |
    {{- toYaml .Values.addons.nvidiaGPUOperator.release.values | nindent 4 }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: HelmRelease
metadata:
  name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}
  labels: {{ include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") | nindent 4 }}
spec:
  clusterName: {{ include "openstack-cluster.clusterName" . }}
  bootstrap: true
  chart: {{ toYaml .Values.addons.nvidiaGPUOperator.chart | nindent 4 }}
  targetNamespace: {{ .Values.addons.nvidiaGPUOperator.release.namespace }}
  releaseName: nvidia-gpu-operator
  valuesSources:
    - secret:
        name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
        key: defaults
    - secret:
        name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
        key: overrides
{{- end }}
Convert addon jobs to addon-provider resources 2022-09-22 11:23:14 +01:00			`{{- if and .Values.addons.enabled .Values.addons.nvidiaGPUOperator.enabled }}`
			`---`
			`apiVersion: v1`
			`kind: Secret`
			`metadata:`
			`name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config`
			`labels:`
			`{{- include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") \| nindent 4 }}`
			`{{ .Values.addons.watchLabel }}: ""`
			`stringData:`
			`defaults: \|`
			`# Use the shared NFD`
			`nfd:`
			`enabled: false`
			`# Export operator and node metrics in a Prometheus format.`
			`# The component provides information on the status of the`
			`# operator (e.g. reconciliation status, number of GPU enabled nodes).`
			`nodeStatusExporter:`
			`enabled: true`
			`toolkit:`
			`# Allowing the toolkit to edit /etc/containerd/config.toml (the default)`
			`# breaks nvidia pod deployment on clusters with Harbor cache enabled.`
			`# Instead make a new config file specifically for nvidia runtime config,`
			`# which is parsed as an "include" in the main containerd config file.`
			`#`
			`# https://github.com/NVIDIA/gpu-operator/issues/301`
			`env:`
			`- name: "CONTAINERD_CONFIG"`
			`value: "/etc/containerd/conf.d/nvidia.toml"`
			`overrides: \|`
			`{{- toYaml .Values.addons.nvidiaGPUOperator.release.values \| nindent 4 }}`
			`---`
			`apiVersion: addons.stackhpc.com/v1alpha1`
			`kind: HelmRelease`
			`metadata:`
			`name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}`
			`labels: {{ include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") \| nindent 4 }}`
			`spec:`
			`clusterName: {{ include "openstack-cluster.clusterName" . }}`
			`bootstrap: true`
			`chart: {{ toYaml .Values.addons.nvidiaGPUOperator.chart \| nindent 4 }}`
			`targetNamespace: {{ .Values.addons.nvidiaGPUOperator.release.namespace }}`
			`releaseName: nvidia-gpu-operator`
			`valuesSources:`
			`- secret:`
			`name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config`
			`key: defaults`
			`- secret:`
			`name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config`
			`key: overrides`
			`{{- end }}`