use qwen

2026-05-06 17:46:54 -05:00
parent 066554aa36
commit da57ec24ee
3 changed files with 48 additions and 69 deletions
--- a/clusters/k3s-dgx/nim-service/kustomization.yaml
+++ b/clusters/k3s-dgx/nim-service/kustomization.yaml
@@ -3,5 +3,4 @@ kind: Kustomization
 namespace: nim-service
 resources:
  - namespace.yaml
-  - llama.yaml
  - qwen.yaml
--- a/clusters/k3s-dgx/nim-service/llama.yaml
+++ b/clusters/k3s-dgx/nim-service/llama.yaml
@@ -1,68 +0,0 @@
-apiVersion: apps.nvidia.com/v1alpha1
-kind: NIMCache
-metadata:
-  name: meta-llama-3-1-8b-instruct
-  namespace: nim-service
-spec:
-  source:
-    ngc:
-      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:2.0.3
-      pullSecret: ngc-secret
-      authSecret: ngc-api-secret
-      model:
-        engine: "vllm"
-        tensorParallelism: "1"
-  storage:
-    pvc:
-      create: true
-      size: "100Gi"
-      volumeAccessMode: ReadWriteOnce
---
-apiVersion: apps.nvidia.com/v1alpha1
-kind: NIMService
-metadata:
-  name: meta-llama-3-1-8b-instruct
-  namespace: nim-service
-spec:
-  args:
-    - --enable-auto-tool-choice
-    - --tool-call-parser
-    - llama3_json
-  image:
-    repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
-    tag: "2.0.3"
-    pullPolicy: IfNotPresent
-    pullSecrets:
-      - ngc-secret
-  authSecret: ngc-api-secret
-  storage:
-    nimCache:
-      name: meta-llama-3-1-8b-instruct
-  replicas: 1
-  resources:
-    limits:
-      nvidia.com/gpu: 1
-  expose:
-    service:
-      type: ClusterIP
-      port: 8000
---
-apiVersion: gateway.networking.k8s.io/v1
-kind: HTTPRoute
-metadata:
-  name: llm-route
-  namespace: nim-service
-spec:
-  parentRefs:
-    - name: envoy-gateway
-      namespace: default
-  hostnames:
-    - "mcp.corredorconect.com"
-  rules:
-    - matches:
-        - path:
-            type: PathPrefix
-            value: /
-      backendRefs:
-        - name: meta-llama-3-1-8b-instruct
-          port: 8000
--- a/clusters/k3s-dgx/nim-service/qwen.yaml
+++ b/clusters/k3s-dgx/nim-service/qwen.yaml
@@ -18,3 +18,51 @@ spec:
      size: "100Gi"
      volumeAccessMode: ReadWriteOnce
 ---
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: qwen3-32b-instruct
+  namespace: nim-service
+spec:
+  args:
+    - --enable-auto-tool-choice
+    - --tool-call-parser
+    - llama3_json
+  image:
+    repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
+    tag: "1.1.0-variant"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: qwen3-32b-instruct
+  replicas: 1
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+  namespace: nim-service
+spec:
+  parentRefs:
+    - name: envoy-gateway
+      namespace: default
+  hostnames:
+    - "mcp.corredorconect.com"
+  rules:
+    - matches:
+        - path:
+            type: PathPrefix
+            value: /
+      backendRefs:
+        - name: qwen3-32b-instruct
+          port: 8000