init commit

This commit is contained in:
2026-05-05 11:15:49 -05:00
commit 06f52750ac
24 changed files with 1158 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: huihui-granite
namespace: kserve
spec:
predictor:
model:
modelFormat:
name: huggingface
args:
- --model_name=huihui-granite
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated"
resources:
limits:
cpu: "4"
memory: 16Gi
nvidia.com/gpu: "1"
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: "1"

View File

@@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kserve
resources:
- huihui-granite-inference.yaml

View File

@@ -0,0 +1,50 @@
apiVersion: v1
kind: Namespace
metadata:
name: flux-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: helm-controller
namespace: flux-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-reconciler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: helm-controller
namespace: flux-system
---
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: bitnami
namespace: flux-system
spec:
interval: 30m
url: https://charts.bitnami.com/bitnami
---
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: kserve
namespace: flux-system
spec:
interval: 30m
url: https://kserve.github.io/kserve
---
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: nvidia
namespace: flux-system
spec:
interval: 30m
url: https://nvidia.github.io/k8s-device-plugin

View File

@@ -0,0 +1,34 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: GitRepository
metadata:
name: flux-system
namespace: flux-system
spec:
interval: 1m0s
ref:
branch: main
secretRef:
name: flux-system
url: ssh://git@gitea.example.com/edge-gitops/edge-gitops.git
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: flux-system
namespace: flux-system
spec:
interval: 10m0s
path: ./clusters/k3s-dgx
prune: true
sourceRef:
kind: GitRepository
name: flux-system
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: kustomize-controller
namespace: flux-system
- apiVersion: apps/v1
kind: Deployment
name: helm-controller
namespace: flux-system

View File

@@ -0,0 +1,13 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
patches:
- patch: |
- op: add
path: /spec/template/spec/containers/0/args/-
value: --concurrency=20
target:
kind: Deployment
name: "(kustomize-controller|helm-controller|notification-controller|source-controller)"

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: gpu-node-labels
namespace: gpu-operator
data:
labels.yaml: |
- key: accelerator
value: nvidia-tesla
- key: nvidia.com/gpu.present
value: "true"
- key: topology.kubernetes.io/zone
value: "dgx-spark"

View File

@@ -0,0 +1,70 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: nvidia
namespace: gpu-operator
spec:
interval: 10m
url: https://nvidia.github.io/gpu-operator
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: gpu-operator
namespace: gpu-operator
spec:
interval: 10m
chart:
spec:
chart: gpu-operator
version: "v23.9.1"
sourceRef:
kind: HelmRepository
name: nvidia
namespace: gpu-operator
values:
driver:
enabled: true
image: "nvcr.io/nvidia/driver"
version: "535.129.03"
operator:
defaultRuntime: nvidia-container-runtime
toolkit:
enabled: true
image: "nvcr.io/nvidia/k8s-device-plugin"
version: "v0.14.0"
env:
- name: CONTAINERD_CONFIG
value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml
- name: CONTAINERD_SOCKET
value: /run/k3s/containerd/containerd.sock
- name: CONTAINERD_RUNTIME_CLASS
value: nvidia
devicePlugin:
enabled: true
image: "nvcr.io/nvidia/k8s-device-plugin"
version: "v0.14.0"
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 500Mi
dcgm:
enabled: true
image: "nvcr.io/nvidia/dcgm-exporter"
version: "3.3.3-3.1.0-ubuntu22.04"
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 500Mi
migManager:
enabled: true
gfd:
enabled: true
node-feature-discovery:
enabled: true

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: gpu-operator
labels:
openshift.io/cluster-monitoring: "true"

View File

@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: gpu-operator
resources:
- gpu-operator-namespace.yaml
- gpu-operator-helmrelease.yaml
- gpu-node-labels.yaml

View File

@@ -0,0 +1,39 @@
apiVersion: v1
kind: Namespace
metadata:
name: gpu-operator
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: gpu-operator
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
template:
metadata:
labels:
name: nvidia-device-plugin-ds
spec:
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0
name: nvidia-device-plugin-ctr
args: ["--fail-on-init-error=false"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: MIG_STRATEGY
value: "single"
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins

View File

@@ -0,0 +1,16 @@
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia
overhead:
podFixed:
memory: "1Gi"
cpu: "500m"
scheduling:
nodeSelector:
nvidia.com/gpu.present: "true"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule

View File

@@ -0,0 +1,42 @@
apiVersion: serving.kserve.io/v1beta1
kind: ServingRuntime
metadata:
name: kserve-gpu-runtime
namespace: kserve
spec:
supportedModelFormats:
- name: tensorflow
version: "2"
autoSelect: true
- name: pytorch
version: "2"
autoSelect: true
- name: sklearn
version: "1"
autoSelect: true
- name: xgboost
version: "1"
autoSelect: true
protocol: v1
protocolVersions:
- v1
- v2
containers:
- name: kserve-container
image: kserve/sklearnserver-gpu:latest
resources:
requests:
cpu: "1"
memory: "2Gi"
nvidia.com/gpu: "1"
limits:
cpu: "4"
memory: "8Gi"
nvidia.com/gpu: "1"
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,utility"
multiModel: false
disabled: false

View File

@@ -0,0 +1,35 @@
apiVersion: networking.istio.io/v1beta1
kind: Gateway
metadata:
name: kserve-gateway
namespace: kserve
spec:
selector:
istio: ingressgateway
servers:
- port:
number: 80
name: http
protocol: HTTP
hosts:
- "*"
---
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
name: kserve-vs
namespace: kserve
spec:
hosts:
- "*"
gateways:
- kserve-gateway
http:
- match:
- uri:
prefix: /v1/models/
route:
- destination:
host: kserve-default
port:
number: 80

View File

@@ -0,0 +1,43 @@
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: kserve
namespace: kserve
spec:
interval: 10m
url: https://kserve.github.io/kserve
---
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kserve
namespace: kserve
spec:
interval: 10m
chart:
spec:
chart: kserve
version: "v0.12.0"
sourceRef:
kind: HelmRepository
name: kserve
namespace: kserve
values:
controller:
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 2
memory: 2Gi
config:
ingress:
className: istio
storage:
initialCapacity: 10Gi
storageClassName: local-path
knative:
enabled: true
istio:
enabled: true

View File

@@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: kserve
labels:
istio-injection: enabled
serving.kserve.io/serving-runtime: "true"

View File

@@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kserve
resources:
- kserve-namespace.yaml
- kserve-controller.yaml
- istio-gateway.yaml
# - gpu-serving-runtime.yaml
# - model-storage-pvc.yaml
# - storage-config.yaml

View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: model-storage-pvc
namespace: kserve
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: local-path

View File

@@ -0,0 +1,20 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: storage-config
namespace: kserve
data:
storageConfig.yaml: |
defaultStorageUri: "pvc://model-storage"
storageSpec:
- name: model-storage
type: pvc
pvcSpec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: local-path
modelCacheSize: 10Gi
modelCacheMemory: 2Gi

View File

@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- flux-system
- gpu-support
- kserve
- apps