init commit
This commit is contained in:
22
clusters/k3s-dgx/apps/huihui-granite-inference.yaml
Normal file
22
clusters/k3s-dgx/apps/huihui-granite-inference.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: serving.kserve.io/v1beta1
|
||||
kind: InferenceService
|
||||
metadata:
|
||||
name: huihui-granite
|
||||
namespace: kserve
|
||||
spec:
|
||||
predictor:
|
||||
model:
|
||||
modelFormat:
|
||||
name: huggingface
|
||||
args:
|
||||
- --model_name=huihui-granite
|
||||
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 16Gi
|
||||
nvidia.com/gpu: "1"
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
nvidia.com/gpu: "1"
|
||||
5
clusters/k3s-dgx/apps/kustomization.yaml
Normal file
5
clusters/k3s-dgx/apps/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: kserve
|
||||
resources:
|
||||
- huihui-granite-inference.yaml
|
||||
50
clusters/k3s-dgx/flux-system/gotk-components.yaml
Normal file
50
clusters/k3s-dgx/flux-system/gotk-components.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: flux-system
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: helm-controller
|
||||
namespace: flux-system
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: cluster-reconciler
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cluster-admin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: helm-controller
|
||||
namespace: flux-system
|
||||
---
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: bitnami
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
url: https://charts.bitnami.com/bitnami
|
||||
---
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: kserve
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
url: https://kserve.github.io/kserve
|
||||
---
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: nvidia
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
url: https://nvidia.github.io/k8s-device-plugin
|
||||
34
clusters/k3s-dgx/flux-system/gotk-sync.yaml
Normal file
34
clusters/k3s-dgx/flux-system/gotk-sync.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: GitRepository
|
||||
metadata:
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1m0s
|
||||
ref:
|
||||
branch: main
|
||||
secretRef:
|
||||
name: flux-system
|
||||
url: ssh://git@gitea.example.com/edge-gitops/edge-gitops.git
|
||||
---
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m0s
|
||||
path: ./clusters/k3s-dgx
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: kustomize-controller
|
||||
namespace: flux-system
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: helm-controller
|
||||
namespace: flux-system
|
||||
13
clusters/k3s-dgx/flux-system/kustomization.yaml
Normal file
13
clusters/k3s-dgx/flux-system/kustomization.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- gotk-components.yaml
|
||||
- gotk-sync.yaml
|
||||
patches:
|
||||
- patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --concurrency=20
|
||||
target:
|
||||
kind: Deployment
|
||||
name: "(kustomize-controller|helm-controller|notification-controller|source-controller)"
|
||||
13
clusters/k3s-dgx/gpu-support/gpu-node-labels.yaml
Normal file
13
clusters/k3s-dgx/gpu-support/gpu-node-labels.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: gpu-node-labels
|
||||
namespace: gpu-operator
|
||||
data:
|
||||
labels.yaml: |
|
||||
- key: accelerator
|
||||
value: nvidia-tesla
|
||||
- key: nvidia.com/gpu.present
|
||||
value: "true"
|
||||
- key: topology.kubernetes.io/zone
|
||||
value: "dgx-spark"
|
||||
70
clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml
Normal file
70
clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: nvidia
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
interval: 10m
|
||||
url: https://nvidia.github.io/gpu-operator
|
||||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
interval: 10m
|
||||
chart:
|
||||
spec:
|
||||
chart: gpu-operator
|
||||
version: "v23.9.1"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: nvidia
|
||||
namespace: gpu-operator
|
||||
values:
|
||||
driver:
|
||||
enabled: true
|
||||
image: "nvcr.io/nvidia/driver"
|
||||
version: "535.129.03"
|
||||
operator:
|
||||
defaultRuntime: nvidia-container-runtime
|
||||
toolkit:
|
||||
enabled: true
|
||||
image: "nvcr.io/nvidia/k8s-device-plugin"
|
||||
version: "v0.14.0"
|
||||
env:
|
||||
- name: CONTAINERD_CONFIG
|
||||
value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml
|
||||
- name: CONTAINERD_SOCKET
|
||||
value: /run/k3s/containerd/containerd.sock
|
||||
- name: CONTAINERD_RUNTIME_CLASS
|
||||
value: nvidia
|
||||
devicePlugin:
|
||||
enabled: true
|
||||
image: "nvcr.io/nvidia/k8s-device-plugin"
|
||||
version: "v0.14.0"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 500Mi
|
||||
dcgm:
|
||||
enabled: true
|
||||
image: "nvcr.io/nvidia/dcgm-exporter"
|
||||
version: "3.3.3-3.1.0-ubuntu22.04"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 500Mi
|
||||
migManager:
|
||||
enabled: true
|
||||
gfd:
|
||||
enabled: true
|
||||
node-feature-discovery:
|
||||
enabled: true
|
||||
6
clusters/k3s-dgx/gpu-support/gpu-operator-namespace.yaml
Normal file
6
clusters/k3s-dgx/gpu-support/gpu-operator-namespace.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
labels:
|
||||
openshift.io/cluster-monitoring: "true"
|
||||
7
clusters/k3s-dgx/gpu-support/kustomization.yaml
Normal file
7
clusters/k3s-dgx/gpu-support/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: gpu-operator
|
||||
resources:
|
||||
- gpu-operator-namespace.yaml
|
||||
- gpu-operator-helmrelease.yaml
|
||||
- gpu-node-labels.yaml
|
||||
39
clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml
Normal file
39
clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-device-plugin-daemonset
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: nvidia-device-plugin-ds
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: nvidia-device-plugin-ds
|
||||
spec:
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0
|
||||
name: nvidia-device-plugin-ctr
|
||||
args: ["--fail-on-init-error=false"]
|
||||
env:
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
- name: MIG_STRATEGY
|
||||
value: "single"
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
16
clusters/k3s-dgx/gpu-support/runtime-class.yaml
Normal file
16
clusters/k3s-dgx/gpu-support/runtime-class.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: node.k8s.io/v1
|
||||
kind: RuntimeClass
|
||||
metadata:
|
||||
name: nvidia
|
||||
handler: nvidia
|
||||
overhead:
|
||||
podFixed:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
scheduling:
|
||||
nodeSelector:
|
||||
nvidia.com/gpu.present: "true"
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
42
clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml
Normal file
42
clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
apiVersion: serving.kserve.io/v1beta1
|
||||
kind: ServingRuntime
|
||||
metadata:
|
||||
name: kserve-gpu-runtime
|
||||
namespace: kserve
|
||||
spec:
|
||||
supportedModelFormats:
|
||||
- name: tensorflow
|
||||
version: "2"
|
||||
autoSelect: true
|
||||
- name: pytorch
|
||||
version: "2"
|
||||
autoSelect: true
|
||||
- name: sklearn
|
||||
version: "1"
|
||||
autoSelect: true
|
||||
- name: xgboost
|
||||
version: "1"
|
||||
autoSelect: true
|
||||
protocol: v1
|
||||
protocolVersions:
|
||||
- v1
|
||||
- v2
|
||||
containers:
|
||||
- name: kserve-container
|
||||
image: kserve/sklearnserver-gpu:latest
|
||||
resources:
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: "2Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "8Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
env:
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: "compute,utility"
|
||||
multiModel: false
|
||||
disabled: false
|
||||
35
clusters/k3s-dgx/kserve/istio-gateway.yaml
Normal file
35
clusters/k3s-dgx/kserve/istio-gateway.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
apiVersion: networking.istio.io/v1beta1
|
||||
kind: Gateway
|
||||
metadata:
|
||||
name: kserve-gateway
|
||||
namespace: kserve
|
||||
spec:
|
||||
selector:
|
||||
istio: ingressgateway
|
||||
servers:
|
||||
- port:
|
||||
number: 80
|
||||
name: http
|
||||
protocol: HTTP
|
||||
hosts:
|
||||
- "*"
|
||||
---
|
||||
apiVersion: networking.istio.io/v1beta1
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
name: kserve-vs
|
||||
namespace: kserve
|
||||
spec:
|
||||
hosts:
|
||||
- "*"
|
||||
gateways:
|
||||
- kserve-gateway
|
||||
http:
|
||||
- match:
|
||||
- uri:
|
||||
prefix: /v1/models/
|
||||
route:
|
||||
- destination:
|
||||
host: kserve-default
|
||||
port:
|
||||
number: 80
|
||||
43
clusters/k3s-dgx/kserve/kserve-controller.yaml
Normal file
43
clusters/k3s-dgx/kserve/kserve-controller.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: kserve
|
||||
namespace: kserve
|
||||
spec:
|
||||
interval: 10m
|
||||
url: https://kserve.github.io/kserve
|
||||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kserve
|
||||
namespace: kserve
|
||||
spec:
|
||||
interval: 10m
|
||||
chart:
|
||||
spec:
|
||||
chart: kserve
|
||||
version: "v0.12.0"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: kserve
|
||||
namespace: kserve
|
||||
values:
|
||||
controller:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 2
|
||||
memory: 2Gi
|
||||
config:
|
||||
ingress:
|
||||
className: istio
|
||||
storage:
|
||||
initialCapacity: 10Gi
|
||||
storageClassName: local-path
|
||||
knative:
|
||||
enabled: true
|
||||
istio:
|
||||
enabled: true
|
||||
7
clusters/k3s-dgx/kserve/kserve-namespace.yaml
Normal file
7
clusters/k3s-dgx/kserve/kserve-namespace.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: kserve
|
||||
labels:
|
||||
istio-injection: enabled
|
||||
serving.kserve.io/serving-runtime: "true"
|
||||
10
clusters/k3s-dgx/kserve/kustomization.yaml
Normal file
10
clusters/k3s-dgx/kserve/kustomization.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: kserve
|
||||
resources:
|
||||
- kserve-namespace.yaml
|
||||
- kserve-controller.yaml
|
||||
- istio-gateway.yaml
|
||||
# - gpu-serving-runtime.yaml
|
||||
# - model-storage-pvc.yaml
|
||||
# - storage-config.yaml
|
||||
12
clusters/k3s-dgx/kserve/model-storage-pvc.yaml
Normal file
12
clusters/k3s-dgx/kserve/model-storage-pvc.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: model-storage-pvc
|
||||
namespace: kserve
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
storageClassName: local-path
|
||||
20
clusters/k3s-dgx/kserve/storage-config.yaml
Normal file
20
clusters/k3s-dgx/kserve/storage-config.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: storage-config
|
||||
namespace: kserve
|
||||
data:
|
||||
storageConfig.yaml: |
|
||||
defaultStorageUri: "pvc://model-storage"
|
||||
storageSpec:
|
||||
- name: model-storage
|
||||
type: pvc
|
||||
pvcSpec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
storageClassName: local-path
|
||||
modelCacheSize: 10Gi
|
||||
modelCacheMemory: 2Gi
|
||||
7
clusters/k3s-dgx/kustomization.yaml
Normal file
7
clusters/k3s-dgx/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- flux-system
|
||||
- gpu-support
|
||||
- kserve
|
||||
- apps
|
||||
Reference in New Issue
Block a user