52 lines
2.1 KiB
YAML
Raw Normal View History

{{- if and .Values.addons.enabled .Values.addons.nvidiaGPUOperator.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
labels:
{{- include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") | nindent 4 }}
{{ .Values.addons.watchLabel }}: ""
stringData:
defaults: |
# Use the shared NFD
nfd:
enabled: false
# Export operator and node metrics in a Prometheus format.
# The component provides information on the status of the
# operator (e.g. reconciliation status, number of GPU enabled nodes).
nodeStatusExporter:
enabled: true
toolkit:
# Allowing the toolkit to edit /etc/containerd/config.toml (the default)
# breaks nvidia pod deployment on clusters with Harbor cache enabled.
# Instead make a new config file specifically for nvidia runtime config,
# which is parsed as an "include" in the main containerd config file.
#
# https://github.com/NVIDIA/gpu-operator/issues/301
env:
- name: "CONTAINERD_CONFIG"
value: "/etc/containerd/conf.d/nvidia.toml"
overrides: |
{{- toYaml .Values.addons.nvidiaGPUOperator.release.values | nindent 4 }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: HelmRelease
metadata:
name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}
labels: {{ include "openstack-cluster.componentLabels" (list . "nvidia-gpu-operator") | nindent 4 }}
spec:
clusterName: {{ include "openstack-cluster.clusterName" . }}
bootstrap: true
chart: {{ toYaml .Values.addons.nvidiaGPUOperator.chart | nindent 4 }}
targetNamespace: {{ .Values.addons.nvidiaGPUOperator.release.namespace }}
releaseName: nvidia-gpu-operator
valuesSources:
- secret:
name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
key: defaults
- secret:
name: {{ include "openstack-cluster.componentName" (list . "nvidia-gpu-operator") }}-config
key: overrides
{{- end }}