integ: gpu-operator chart upgrade 1.6.0 -> 1.8.1
This upgrade is needed in support of A100 GPU, kernel upgrade and bug 1948050. It eliminates the requirement to create nvidia specific runtimeclass prior to installing the charts by pre-installing the toolkit through toolkit- installer subchart. This commit has been tested with the following: driver: 470.57.02 toolkit: 1.7.1-ubi8 defaultRuntime: containerd Test Plan: PASS: Verify gpu-operator starts and adds nvidia.com/gpu to the node. PASS: Verify nvidia-toolkit is removed with helm override of global.toolkit_force_clean=true. PASS: Verify pods can access gpu device and nvidia tools to monitor the GPU. PASS: Verify pod can build and execute cuda sample code. PASS: Verify driver pod prints out warning when building on Low Latency kernel with helm override of: --set driver.env[0].name=IGNORE_PREEMPT_RT_PRESENCE Closes-Bug: 1948050 Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com> Change-Id: I18dd2a0ab1adc6f9364314a22373aadc93cad27f
This commit is contained in:
parent
f1955eff93
commit
fabc6822a0
@ -92,5 +92,5 @@ xxHash-1f40c6511fa8dd9d2e337ca8c9bc18b3e87663c9.tar.gz#xxHash#https://api.github
|
|||||||
zstd-b706286adbba780006a47ef92df0ad7a785666b6.tar.gz#zstd#https://api.github.com/repos/facebook/zstd/tarball/b706286adbba780006a47ef92df0ad7a785666b6#https##
|
zstd-b706286adbba780006a47ef92df0ad7a785666b6.tar.gz#zstd#https://api.github.com/repos/facebook/zstd/tarball/b706286adbba780006a47ef92df0ad7a785666b6#https##
|
||||||
inih-b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69.tar.gz#inih-44#https://github.com/benhoyt/inih/tarball/b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69#https##
|
inih-b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69.tar.gz#inih-44#https://github.com/benhoyt/inih/tarball/b1dbff4b0bd1e1f40d237e21011f6dee0ec2fa69#https##
|
||||||
pf-bb-config-d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585.tar.gz#pf-bb-config-21.6#https://github.com/intel/pf-bb-config/tarball/d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585#https##
|
pf-bb-config-d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585.tar.gz#pf-bb-config-21.6#https://github.com/intel/pf-bb-config/tarball/d7d5f1ddd17b4c80e3e0d6ce87660926f58f8585#https##
|
||||||
gpu-operator-1.6.0.tar.gz#gpu-operator-1.6.0#https://github.com/NVIDIA/gpu-operator/archive/1.6.0.tar.gz##https##
|
gpu-operator-1.8.1.tar.gz#gpu-operator-1.8.1#https://github.com/NVIDIA/gpu-operator/archive/v1.8.1.tar.gz##https##
|
||||||
containernetworking-plugins-v0.9.1.tar.gz#containernetworking-plugins-v0.9.1#https://github.com/containernetworking/plugins/archive/refs/tags/v0.9.1.tar.gz#https##
|
containernetworking-plugins-v0.9.1.tar.gz#containernetworking-plugins-v0.9.1#https://github.com/containernetworking/plugins/archive/refs/tags/v0.9.1.tar.gz#https##
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
VERSION=1.6.0
|
VERSION=1.8.1
|
||||||
TAR_NAME=gpu-operator
|
TAR_NAME=gpu-operator
|
||||||
TAR="$TAR_NAME-$VERSION.tar.gz"
|
TAR="$TAR_NAME-$VERSION.tar.gz"
|
||||||
COPY_LIST=" \
|
COPY_LIST=" \
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
Summary: StarlingX nvidia gpu-operator helm chart
|
Summary: StarlingX nvidia gpu-operator helm chart
|
||||||
Name: gpu-operator
|
Name: gpu-operator
|
||||||
Version: 1.6.0
|
Version: 1.8.1
|
||||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||||
License: Apache-2.0
|
License: Apache-2.0
|
||||||
Group: base
|
Group: base
|
||||||
@ -31,11 +31,15 @@ StarlingX port of NVIDIA gpu-operator
|
|||||||
%patch02 -p1
|
%patch02 -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
cp -r assets deployments/gpu-operator/assets
|
mkdir -p deployments/gpu-operator/assets/state-driver/
|
||||||
|
mkdir -p deployments/gpu-operator/assets/state-operator-validation/
|
||||||
|
cp assets/state-driver/0500_daemonset.yaml \
|
||||||
|
deployments/gpu-operator/assets/state-driver/0500_daemonset.yaml
|
||||||
|
cp assets/state-operator-validation/0500_daemonset.yaml \
|
||||||
|
deployments/gpu-operator/assets/state-operator-validation/0500_daemonset.yaml
|
||||||
helm lint deployments/gpu-operator
|
helm lint deployments/gpu-operator
|
||||||
mkdir build_results
|
mkdir build_results
|
||||||
helm package --version %{helm_ver}-%{version}.%{tis_patch_ver} --app-version %{version} -d build_results deployments/gpu-operator
|
helm package --version %{helm_ver}-%{version}.%{tis_patch_ver} --app-version v%{version} -d build_results deployments/gpu-operator
|
||||||
|
|
||||||
%install
|
%install
|
||||||
install -d -m 755 ${RPM_BUILD_ROOT}%{helm_folder}
|
install -d -m 755 ${RPM_BUILD_ROOT}%{helm_folder}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
From b968c69971a195aba4e0c03e8a70df074c128f69 Mon Sep 17 00:00:00 2001
|
From 1094b6f1593ec454b3a6313ecf9fae53f8c66899 Mon Sep 17 00:00:00 2001
|
||||||
From: Babak Sarashki <babak.sarashki@windriver.com>
|
From: Babak Sarashki <babak.sarashki@windriver.com>
|
||||||
Date: Sat, 6 Mar 2021 00:22:40 +0000
|
Date: Sat, 6 Mar 2021 00:22:40 +0000
|
||||||
Subject: [PATCH 1/2] deployments: setup configmap with assets for volumemounts
|
Subject: [PATCH 1/2] deployments: setup configmap with assets for volumemounts
|
||||||
@ -8,17 +8,17 @@ export to the gpu-operator pod through configmap volumeMounts.
|
|||||||
|
|
||||||
Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com>
|
Signed-off-by: Babak Sarashki <babak.sarashki@windriver.com>
|
||||||
---
|
---
|
||||||
.../gpu-operator/templates/operator.yaml | 45 +++++++++++++++++++
|
.../gpu-operator/templates/operator.yaml | 44 +++++++++++++++++++
|
||||||
.../templates/operator_configmap.yaml | 36 +++++++++++++++
|
.../templates/operator_configmap.yaml | 36 +++++++++++++++
|
||||||
deployments/gpu-operator/values.yaml | 2 +
|
deployments/gpu-operator/values.yaml | 2 +
|
||||||
3 files changed, 83 insertions(+)
|
3 files changed, 82 insertions(+)
|
||||||
create mode 100644 deployments/gpu-operator/templates/operator_configmap.yaml
|
create mode 100644 deployments/gpu-operator/templates/operator_configmap.yaml
|
||||||
|
|
||||||
diff --git a/deployments/gpu-operator/templates/operator.yaml b/deployments/gpu-operator/templates/operator.yaml
|
diff --git a/deployments/gpu-operator/templates/operator.yaml b/deployments/gpu-operator/templates/operator.yaml
|
||||||
index 50983b20..1dfd9dbc 100644
|
index 1d81f74..c97b4b1 100644
|
||||||
--- a/deployments/gpu-operator/templates/operator.yaml
|
--- a/deployments/gpu-operator/templates/operator.yaml
|
||||||
+++ b/deployments/gpu-operator/templates/operator.yaml
|
+++ b/deployments/gpu-operator/templates/operator.yaml
|
||||||
@@ -50,6 +50,45 @@ spec:
|
@@ -49,6 +49,44 @@ spec:
|
||||||
- name: host-os-release
|
- name: host-os-release
|
||||||
mountPath: "/host-etc/os-release"
|
mountPath: "/host-etc/os-release"
|
||||||
readOnly: true
|
readOnly: true
|
||||||
@ -60,11 +60,10 @@ index 50983b20..1dfd9dbc 100644
|
|||||||
+ subPath: {{ printf "state_monitor_%s" (base $path) }}
|
+ subPath: {{ printf "state_monitor_%s" (base $path) }}
|
||||||
+ {{- end }}
|
+ {{- end }}
|
||||||
+ {{- end }}
|
+ {{- end }}
|
||||||
+
|
livenessProbe:
|
||||||
readinessProbe:
|
httpGet:
|
||||||
exec:
|
path: /healthz
|
||||||
command: ["stat", "/tmp/operator-sdk-ready"]
|
@@ -72,6 +110,12 @@ spec:
|
||||||
@@ -63,6 +102,12 @@ spec:
|
|
||||||
- name: host-os-release
|
- name: host-os-release
|
||||||
hostPath:
|
hostPath:
|
||||||
path: "/etc/os-release"
|
path: "/etc/os-release"
|
||||||
@ -79,7 +78,7 @@ index 50983b20..1dfd9dbc 100644
|
|||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
diff --git a/deployments/gpu-operator/templates/operator_configmap.yaml b/deployments/gpu-operator/templates/operator_configmap.yaml
|
diff --git a/deployments/gpu-operator/templates/operator_configmap.yaml b/deployments/gpu-operator/templates/operator_configmap.yaml
|
||||||
new file mode 100644
|
new file mode 100644
|
||||||
index 00000000..61f366e8
|
index 0000000..61f366e
|
||||||
--- /dev/null
|
--- /dev/null
|
||||||
+++ b/deployments/gpu-operator/templates/operator_configmap.yaml
|
+++ b/deployments/gpu-operator/templates/operator_configmap.yaml
|
||||||
@@ -0,0 +1,36 @@
|
@@ -0,0 +1,36 @@
|
||||||
@ -120,18 +119,18 @@ index 00000000..61f366e8
|
|||||||
+{{- end }}
|
+{{- end }}
|
||||||
+{{- end }}
|
+{{- end }}
|
||||||
diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml
|
diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml
|
||||||
index 00d94195..8b43c59f 100644
|
index 78a4757..6689636 100644
|
||||||
--- a/deployments/gpu-operator/values.yaml
|
--- a/deployments/gpu-operator/values.yaml
|
||||||
+++ b/deployments/gpu-operator/values.yaml
|
+++ b/deployments/gpu-operator/values.yaml
|
||||||
@@ -39,6 +39,8 @@ operator:
|
@@ -70,6 +70,8 @@ operator:
|
||||||
values: [""]
|
values: [""]
|
||||||
logging:
|
logging:
|
||||||
timeEncoding: epoch
|
timeEncoding: epoch
|
||||||
+ # Set to "include_assets" to include assets/gpu-operator with the helm chart
|
+ # Set "include_assets" true to include assets/gpu-operator with the helm chart
|
||||||
+ include_assets: ""
|
+ include_assets: ""
|
||||||
|
resources:
|
||||||
driver:
|
limits:
|
||||||
repository: nvcr.io/nvidia
|
cpu: 500m
|
||||||
--
|
--
|
||||||
2.17.1
|
2.17.1
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user