add test model

2026-05-06 12:07:59 -05:00
parent 93fc5e52d7
commit df78657374
4 changed files with 56 additions and 0 deletions
--- a/clusters/k3s-dgx/nim-service/kustomization.yaml
+++ b/clusters/k3s-dgx/nim-service/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: kserve
+resources:
+  - namespace.yaml
+  - llama-instruct-1b.yaml
--- a/clusters/k3s-dgx/nim-service/llama-instruct-1b.yaml
+++ b/clusters/k3s-dgx/nim-service/llama-instruct-1b.yaml
@@ -0,0 +1,45 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMCache
+metadata:
+  name: meta-llama-3-2-1b-instruct
+  namespace: nim-service
+spec:
+  source:
+    ngc:
+      modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.12.0
+      pullSecret: ngc-secret
+      authSecret: ngc-api-secret
+      model:
+        engine: tensorrt_llm
+        tensorParallelism: "1"
+  storage:
+    pvc:
+      create: true
+      size: "50Gi"
+      volumeAccessMode: ReadWriteOnce
+---
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: meta-llama-3-2-1b-instruct
+  namespace: nim-service
+spec:
+  image:
+    repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
+    tag: "1.12.0"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: meta-llama-3-2-1b-instruct
+      profile: ''
+  replicas: 1
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
--- a/clusters/k3s-dgx/nim-service/namespace.yaml
+++ b/clusters/k3s-dgx/nim-service/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nim-service