integ: gpu-operator chart upgrade 1.6.0 -> 1.8.1
This upgrade is needed in support of A100 GPU, kernel upgrade and bug 1948050. It eliminates the requirement to create nvidia specific runtimeclass prior to installing the charts by pre-installing the toolkit through toolkit- installer subchart. This commit has been tested with the following: driver: 470.57.02 toolkit: 1.7.1-ubi8 defaultRuntime: containerd Test Plan: PASS: Verify gpu-operator starts and adds nvidia.com/gpu to the node. PASS: Verify nvidia-toolkit is removed with helm override of global.toolkit_force_clean=true. PASS: Verify pods can access gpu device and nvidia tools to monitor the GPU. PASS: Verify pod can build and execute cuda sample code. PASS: Verify driver pod prints out warning when building on Low Latency kernel with helm override of: --set driver.env[0].name=IGNORE_PREEMPT_RT_PRESENCE Closes-Bug: 1948050 Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com> Change-Id: I18dd2a0ab1adc6f9364314a22373aadc93cad27f
This commit is contained in:
parent
f1955eff93
commit
fabc6822a0
@ -92,5 +92,5 @@ xxHash-1f40c6511fa8dd9d2e337ca8c9bc18b3e87663c9.tar.gz#xxHash#https://api.github
|
||||
zstd-b706286adbba780006a47ef92df0ad7a785666b6.tar.gz#zstd#https://api.github.com/repos/facebook/zstd/tarball/b706286adbba780006a47ef92df0ad7a785666b6#https##
|
||||
inih-b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69.tar.gz#inih-44#https://github.com/benhoyt/inih/tarball/b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69#https##
|
||||
pf-bb-config-d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585.tar.gz#pf-bb-config-21.6#https://github.com/intel/pf-bb-config/tarball/d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585#https##
|
||||
gpu-operator-1.6.0.tar.gz#gpu-operator-1.6.0#https://github.com/NVIDIA/gpu-operator/archive/1.6.0.tar.gz##https##
|
||||
gpu-operator-1.8.1.tar.gz#gpu-operator-1.8.1#https://github.com/NVIDIA/gpu-operator/archive/v1.8.1.tar.gz##https##
|
||||
containernetworking-plugins-v0.9.1.tar.gz#containernetworking-plugins-v0.9.1#https://github.com/containernetworking/plugins/archive/refs/tags/v0.9.1.tar.gz#https##
|
||||
|
@ -1,4 +1,4 @@
|
||||
VERSION=1.6.0
|
||||
VERSION=1.8.1
|
||||
TAR_NAME=gpu-operator
|
||||
TAR="$TAR_NAME-$VERSION.tar.gz"
|
||||
COPY_LIST=" \
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
Summary: StarlingX nvidia gpu-operator helm chart
|
||||
Name: gpu-operator
|
||||
Version: 1.6.0
|
||||
Version: 1.8.1
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
License: Apache-2.0
|
||||
Group: base
|
||||
@ -31,11 +31,15 @@ StarlingX port of NVIDIA gpu-operator
|
||||
%patch02 -p1
|
||||
|
||||
%build
|
||||
cp -r assets deployments/gpu-operator/assets
|
||||
|
||||
mkdir -p deployments/gpu-operator/assets/state-driver/
|
||||
mkdir -p deployments/gpu-operator/assets/state-operator-validation/
|
||||
cp assets/state-driver/0500_daemonset.yaml \
|
||||
deployments/gpu-operator/assets/state-driver/0500_daemonset.yaml
|
||||
cp assets/state-operator-validation/0500_daemonset.yaml \
|
||||
deployments/gpu-operator/assets/state-operator-validation/0500_daemonset.yaml
|
||||
helm lint deployments/gpu-operator
|
||||
mkdir build_results
|
||||
helm package --version %{helm_ver}-%{version}.%{tis_patch_ver} --app-version %{version} -d build_results deployments/gpu-operator
|
||||
helm package --version %{helm_ver}-%{version}.%{tis_patch_ver} --app-version v%{version} -d build_results deployments/gpu-operator
|
||||
|
||||
%install
|
||||
install -d -m 755 ${RPM_BUILD_ROOT}%{helm_folder}
|
||||
|
@ -1,4 +1,4 @@
|
||||
From b968c69971a195aba4e0c03e8a70df074c128f69 Mon Sep 17 00:00:00 2001
|
||||
From 1094b6f1593ec454b3a6313ecf9fae53f8c66899 Mon Sep 17 00:00:00 2001
|
||||
From: Babak Sarashki <babak.sarashki@windriver.com>
|
||||
Date: Sat, 6 Mar 2021 00:22:40 +0000
|
||||
Subject: [PATCH 1/2] deployments: setup configmap with assets for volumemounts
|
||||
@ -8,17 +8,17 @@ export to the gpu-operator pod through configmap volumeMounts.
|
||||
|
||||
Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com>
|
||||
---
|
||||
.../gpu-operator/templates/operator.yaml | 45 +++++++++++++++++++
|
||||
.../gpu-operator/templates/operator.yaml | 44 +++++++++++++++++++
|
||||
.../templates/operator_configmap.yaml | 36 +++++++++++++++
|
||||
deployments/gpu-operator/values.yaml | 2 +
|
||||
3 files changed, 83 insertions(+)
|
||||
3 files changed, 82 insertions(+)
|
||||
create mode 100644 deployments/gpu-operator/templates/operator_configmap.yaml
|
||||
|
||||
diff --git a/deployments/gpu-operator/templates/operator.yaml b/deployments/gpu-operator/templates/operator.yaml
|
||||
index 50983b20..1dfd9dbc 100644
|
||||
index 1d81f74..c97b4b1 100644
|
||||
--- a/deployments/gpu-operator/templates/operator.yaml
|
||||
+++ b/deployments/gpu-operator/templates/operator.yaml
|
||||
@@ -50,6 +50,45 @@ spec:
|
||||
@@ -49,6 +49,44 @@ spec:
|
||||
- name: host-os-release
|
||||
mountPath: "/host-etc/os-release"
|
||||
readOnly: true
|
||||
@ -60,11 +60,10 @@ index 50983b20..1dfd9dbc 100644
|
||||
+ subPath: {{ printf "state_monitor_%s" (base $path) }}
|
||||
+ {{- end }}
|
||||
+ {{- end }}
|
||||
+
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: ["stat", "/tmp/operator-sdk-ready"]
|
||||
@@ -63,6 +102,12 @@ spec:
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
@@ -72,6 +110,12 @@ spec:
|
||||
- name: host-os-release
|
||||
hostPath:
|
||||
path: "/etc/os-release"
|
||||
@ -79,7 +78,7 @@ index 50983b20..1dfd9dbc 100644
|
||||
{{- toYaml . | nindent 8 }}
|
||||
diff --git a/deployments/gpu-operator/templates/operator_configmap.yaml b/deployments/gpu-operator/templates/operator_configmap.yaml
|
||||
new file mode 100644
|
||||
index 00000000..61f366e8
|
||||
index 0000000..61f366e
|
||||
--- /dev/null
|
||||
+++ b/deployments/gpu-operator/templates/operator_configmap.yaml
|
||||
@@ -0,0 +1,36 @@
|
||||
@ -120,18 +119,18 @@ index 00000000..61f366e8
|
||||
+{{- end }}
|
||||
+{{- end }}
|
||||
diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml
|
||||
index 00d94195..8b43c59f 100644
|
||||
index 78a4757..6689636 100644
|
||||
--- a/deployments/gpu-operator/values.yaml
|
||||
+++ b/deployments/gpu-operator/values.yaml
|
||||
@@ -39,6 +39,8 @@ operator:
|
||||
@@ -70,6 +70,8 @@ operator:
|
||||
values: [""]
|
||||
logging:
|
||||
timeEncoding: epoch
|
||||
+ # Set to "include_assets" to include assets/gpu-operator with the helm chart
|
||||
+ # Set "include_assets" true to include assets/gpu-operator with the helm chart
|
||||
+ include_assets: ""
|
||||
|
||||
driver:
|
||||
repository: nvcr.io/nvidia
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
--
|
||||
2.17.1
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user