magnum-capi-helm-charts/charts/cluster-addons/values.yaml

# The name of the Kubernetes cluster we are deploying to
# Defaults to the release name if not given for use as a dependency of openstack-cluster
clusterName: "{{ .Release.Name }}"

# The Kubernetes version of the target cluster
# This is treated as a template at rendering time
kubernetesVersion: v1.22

# Indicates whether the addons are being deployed as part of a Cluster API cluster
# If true then addons will wait for the cluster to become ready before installing, except
# for the bootstrap addons which just wait for the API to become available
clusterApi: false

# Details of a secret containing a kubeconfig file for a remote cluster
# If given, this is used in preference to a service account
kubeconfigSecret:
  # The name of the secret
  # This is treated as a template during rendering
  name:
  # The key of the kubeconfig file in the secret
  key: value

# Options for the service account to use
# A pre-existing service account can be used, or a new one can be created
#
# A service account is always required as it is used by the pre-delete hook
# to suspend any install jobs that are still running prior to running the deletion
#
# The permissions required by the service account depend on whether the installation
# is targetting a remote cluster or the local cluster
#
# Whether the installation target is local or remote, the service account needs to
# have permission to list and patch jobs in the release namespace for the delete hook
# in order to suspend any running install jobs
#
# When the installation targets the local cluster, the service account must also have
# permission to create any resources that need to be installed, which could be into
# other namespaces - the cluster-admin cluster role is normally used for this
serviceAccount:
  # Indicates whether to create a new service account
  create: true
  # The name of the cluster role to bind the created service account to
  clusterRoleName: cluster-admin
  # The name of the service account
  # If create = true, this is the name of the created service account
  # If create = false, this is the name of an existing service account to use
  # This is treated as a template during rendering
  name: "{{ include \"cluster-addons.fullname\" . }}-deployer"

# Default settings for jobs
jobDefaults:
  image:
    repository: ghcr.io/stackhpc/k8s-utils
    tag:  # Defaults to chart appVersion if not given
    pullPolicy: IfNotPresent
  imagePullSecrets: []
  backoffLimit: 1000
  activeDeadlineSeconds: 3600
  podSecurityContext:
    runAsNonRoot: true
  securityContext:
    allowPrivilegeEscalation: false
  resources: {}
  hostNetwork: false
  tolerations: []
  nodeSelector: {}
  affinity: {}

# The available categories for dependencies and the addons that belong to them
categories:
  bootstrap:
    - cloud-config
    - ccm-openstack
    - cni-calico
    - cni-cilium
    - prometheus-operator-crds
  storage: [csi-cinder]
  ingress: [ingress-nginx]

# Settings for the CNI addon
cni:
  # Indicates if a CNI should be deployed
  enabled: true
  # The type of CNI to deploy - supported values are calico or cilium
  type: calico
  # Settings for the calico CNI
  calico:
    chart:
      repo: https://projectcalico.docs.tigera.io/charts
      name: tigera-operator
      version: v3.23.3
    release:
      namespace: tigera-operator
      # See https://projectcalico.docs.tigera.io/getting-started/kubernetes/helm
      values:
        # Managing the installation separately makes deriving the pod CIDR cleaner
        installation:
          enabled: false
    # The spec of the Calico installation
    # See https://projectcalico.docs.tigera.io/reference/installation/api
    installation:
      calicoNetwork:
        # By default, disable BGP
        bgp: Disabled
        # Use the interface that holds the Kubernetes internal IP
        nodeAddressAutodetectionV4:
          kubernetes: NodeInternalIP
        # Use a single IP pool with VXLAN
        # The special variable __KUBEADM_POD_CIDR__ is replaced with the pod CIDR from the
        # kubeadm configmap, if kubeadm is in use
        ipPools:
          - cidr: __KUBEADM_POD_CIDR__
            encapsulation: VXLAN
  # Settings for the Cilium CNI
  cilium:
    chart:
      repo: https://helm.cilium.io/
      name: cilium
      version: 1.11.1
    release:
      namespace: kube-system
      # See https://docs.cilium.io/en/stable/gettingstarted/k8s-install-helm/ for details
      values:
        ipam:
          mode: kubernetes

# Settings for the OpenStack integrations
openstack:
  # Indicates if the OpenStack integrations should be enabled
  enabled: false
  # The version of the OpenStack cloud provider to install
  # By default, use the release branch for the Kubernetes version of the target cluster
  version: release-{{ tpl .Values.kubernetesVersion . | trimPrefix "v" }}
  # The base URL for OpenStack cloud provider manifests
  # By default, pull the manifests from GitHub at the specified version
  manifestsBaseURL: https://raw.githubusercontent.com/kubernetes/cloud-provider-openstack/{{ tpl .Values.openstack.version . }}
  # The name of a secret containing a clouds.yaml file and optional cacert
  # If the cacert is present, it should be referred to in the clouds.yaml file as /etc/config/cacert
  # See https://docs.openstack.org/openstacksdk/latest/user/config/configuration.html#ssl-settings
  cloudCredentialsSecretName:
  # The name of the cloud to use in the clouds.yaml
  cloudName: openstack
  # cloud-config options for the OpenStack integrations
  # The [Global] section is configured to use the specified cloud from .Values.clouds
  # See https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md#config-openstack-cloud-controller-manager
  # and https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md#block-storage
  cloudConfig:
    # By default, ignore volume AZs for Cinder as most clouds have a single globally-attachable Cinder AZ
    BlockStorage:
      ignore-volume-az: true
  # Settings for the Cloud Controller Manager (CCM)
  ccm:
    # Indicates if the OpenStack CCM should be enabled
    # By default, the CCM is enabled if the OpenStack integrations are enabled
    enabled: true
    # The prefix for RBAC manifests
    # Unfortunately, this changes for different Kubernetes versions
    rbacManifestsPrefix: >-
      {{
        tpl .Values.kubernetesVersion . |
          trimPrefix "v" |
          semverCompare ">=1.22" |
          ternary "manifests/controller-manager" "cluster/addons/rbac"
      }}
    # The URLs to use for the manifests
    manifests:
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/{{ tpl .Values.openstack.ccm.rbacManifestsPrefix . }}/cloud-controller-manager-roles.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/{{ tpl .Values.openstack.ccm.rbacManifestsPrefix . }}/cloud-controller-manager-role-bindings.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/controller-manager/openstack-cloud-controller-manager-ds.yaml"
    # Any kustomization to apply to the OpenStack CCM manifests
    kustomization: {}
  # Settings for the Cinder CSI plugin
  csiCinder:
    # Indicates if the Cinder CSI should be enabled
    # By default, it is enabled if the OpenStack integrations are enabled
    enabled: true
    # The URLs to use for the manifests
    manifests:
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/cinder-csi-plugin/cinder-csi-controllerplugin-rbac.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/cinder-csi-plugin/cinder-csi-controllerplugin.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/cinder-csi-plugin/cinder-csi-nodeplugin-rbac.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/cinder-csi-plugin/cinder-csi-nodeplugin.yaml"
      - "{{ tpl .Values.openstack.manifestsBaseURL . }}/manifests/cinder-csi-plugin/csi-cinder-driver.yaml"
    # Any kustomization to apply to the OpenStack CCM manifests
    kustomization: {}
    # Variables affecting the definition of the storage class
    storageClass:
      # Indicates if the storage class should be enabled
      enabled: true
      # The name of the storage class
      name: csi-cinder
      # Indicates if the storage class should be annotated as the default storage class
      isDefault: true
      # The reclaim policy for the storage class
      reclaimPolicy: Delete
      # Indicates if volume expansion is allowed
      allowVolumeExpansion: true
      # The Cinder availability zone to use for volumes provisioned by the storage class
      availabilityZone: nova
      # The Cinder volume type to use for volumes provisioned by the storage class
      # If not given, the default volume type will be used
      volumeType:
      # The allowed topologies for the storage class
      allowedTopologies:

# Settings for the metrics server
metricsServer:
  # Indicates if the metrics server should be deployed
  enabled: true
  # The version of the metrics server to deploy
  version: v0.6.1
  # The URLs of the metrics server manifests
  manifests:
    - https://github.com/kubernetes-sigs/metrics-server/releases/download/{{ .Values.metricsServer.version }}/components.yaml
  # Any kustomization to be applied to the metrics server manifests
  kustomization:
    patches:
      - patch: |-
          - op: add
            path: /spec/template/spec/containers/0/args/-
            value: --kubelet-insecure-tls
        target:
          kind: Deployment
          name: metrics-server

# Settings for the Kubernetes dashboard
kubernetesDashboard:
  # Indicates if the Kubernetes dashboard should be enabled
  enabled: false
  chart:
    repo: https://kubernetes.github.io/dashboard
    name: kubernetes-dashboard
    version: 5.3.1
  release:
    namespace: kubernetes-dashboard
    values:
      # Enable the metrics scraper by default
      metricsScraper:
        enabled: true

# Settings for cert-manager
certManager:
  # Indicates if cert-manager should be enabled
  enabled: false
  chart:
    repo: https://charts.jetstack.io
    name: cert-manager
    version: v1.5.5
  release:
    namespace: cert-manager
    # See https://cert-manager.io/docs/installation/helm/ for available values
    values:
      # By default, make sure the cert-manager CRDs are installed
      installCRDs: true
      # Disable Prometheus support for now
      prometheus:
        enabled: false
  # Settings for automatic ACME HTTP01 support using Let's Encrypt
  # This is only enabled if ingress is also enabled
  acmeHttp01Issuer:
    enabled: yes
    name: letsencrypt-http01
    server: https://acme-v02.api.letsencrypt.org/directory

# Settings for ingress controllers
ingress:
  # Indicates if ingress controllers should be enabled
  enabled: false
  # Settings for the Nginx ingress controller
  nginx:
    # Indicates if the Nginx ingress controller should be enabled
    enabled: true
    chart:
      repo: https://kubernetes.github.io/ingress-nginx
      name: ingress-nginx
      version: 4.0.18
    release:
      namespace: ingress-nginx
      # See https://github.com/kubernetes/ingress-nginx/tree/main/charts/ingress-nginx#configuration
      values: {}

# Settings for cluster monitoring
monitoring:
  # Indicates if the cluster monitoring should be enabled
  enabled: false
  prometheusOperatorCrds:
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
    - https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
  kubePrometheusStack:
    chart:
      repo: https://prometheus-community.github.io/helm-charts
      name: kube-prometheus-stack
      version: 34.6.0
    release:
      namespace: monitoring-system
      values: {}
  lokiStack:
    enabled: true
    chart:
      repo: https://grafana.github.io/helm-charts
      name: loki-stack
      version: 2.6.1
    release:
      namespace: monitoring-system
      values: {}

# Settings for node feature discovery
nodeFeatureDiscovery:
  # Indicates if node feature discovery should be enabled
  enabled: true
  chart:
    repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts
    name: node-feature-discovery
    version: 0.11.0
  release:
    namespace: node-feature-discovery
    values:
      master:
        extraLabelNs:
          - nvidia.com
      worker:
        # Allow the NFD pods to be scheduled on master nodes
        tolerations:
          - key: "node-role.kubernetes.io/master"
            operator: "Equal"
            value: ""
            effect: "NoSchedule"
          - key: "nvidia.com/gpu"
            operator: "Equal"
            value: "present"
            effect: "NoSchedule"
        # We want to be able to identify nodes with high-performance hardware
        # So the whitelisted device classes are:
        #   02   - Network Controllers (e.g. Ethernet, Infiniband)
        #   03   - Display Controllers (e.g. GPUs)
        #   0b40 - Co-processors
        #   12   - Processing Accelerators (e.g. specialised AI inference chips)
        config:
          sources:
            pci:
              deviceClassWhitelist:
                - "02"
                - "03"
                - "0b40"
                - "12"
              deviceLabelFields:
                - vendor

# Settings for the NVIDIA GPU operator
nvidiaGPUOperator:
  # Indicates if the NVIDIA GPU operator should be enabled
  # Note that because it uses node feature discovery to run only on nodes
  # with an NVIDIA GPU available, the overhead of enabling this on clusters
  # that do not need it now but may need it in the future is low
  enabled: true
  chart:
    repo: https://nvidia.github.io/gpu-operator
    name: gpu-operator
    version: v1.10.0
  release:
    namespace: gpu-operator
    values:
      # Use the shared NFD
      nfd:
        enabled: false
      # Export operator and node metrics in a Prometheus format.
      # The component provides information on the status of the
      # operator (e.g. reconciliation status, number of GPU enabled nodes).
      nodeStatusExporter:
        enabled: true
      toolkit:
        # Allowing the toolkit to edit /etc/containerd/config.toml (the default)
        # breaks nvidia pod deployment on clusters with Harbor cache enabled.
        # Instead make a new config file specifically for nvidia runtime config,
        # which is parsed as an "include" in the main containerd config file.
        #
        # https://github.com/NVIDIA/gpu-operator/issues/301
        env:
          - name: "CONTAINERD_CONFIG"
            value: "/etc/containerd/conf.d/nvidia.toml"

# Settings for the Mellanox network operator
mellanoxNetworkOperator:
  # Indicates if the network operator should be enabled
  # Note that because it uses node feature discovery to run only on nodes
  # with a Mellanox NIC available, the overhead of enabling this on clusters
  # that do not need it now but may need it in the future is low
  enabled: true
  chart:
    repo: https://mellanox.github.io/network-operator
    name: network-operator
    version: 1.1.0
  release:
    namespace: network-operator
    values:
      # Use the shared NFD
      nfd:
        enabled: false
      # Deploy the default NICClusterPolicy
      deployCR: true
      # Deploy the OFED driver onto nodes with a suitable NIC
      ofedDriver:
        deploy: true
        # OFED takes ages to deploy on low-resource nodes
        # The startup probe has a fixed failure threshold of 60
        # So in order to give the drivers up to one hour to install, we use a period
        # of 60 seconds for the startup probe
        startupProbe:
          initialDelaySeconds: 60
          periodSeconds: 60
      # Deploy the RDMA shared device plugin to allow pods to access the RDMA device
      rdmaSharedDevicePlugin:
        deploy: true
      # Disable all other features for now
      sriovNetworkOperator:
        enabled: false
      sriovDevicePlugin:
        deploy: false
      secondaryNetwork:
        deploy: false

# Map of extra addons in the form "component name" -> "addon spec"
extraAddons: {}