use qwen

2026-05-06 17:46:54 -05:00
parent 066554aa36
commit da57ec24ee
3 changed files with 48 additions and 69 deletions
--- a/clusters/k3s-dgx/nim-service/qwen.yaml
+++ b/clusters/k3s-dgx/nim-service/qwen.yaml
@@ -18,3 +18,51 @@ spec:
      size: "100Gi"
      volumeAccessMode: ReadWriteOnce
 ---
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: qwen3-32b-instruct
+  namespace: nim-service
+spec:
+  args:
+    - --enable-auto-tool-choice
+    - --tool-call-parser
+    - llama3_json
+  image:
+    repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
+    tag: "1.1.0-variant"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: qwen3-32b-instruct
+  replicas: 1
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-route
+  namespace: nim-service
+spec:
+  parentRefs:
+    - name: envoy-gateway
+      namespace: default
+  hostnames:
+    - "mcp.corredorconect.com"
+  rules:
+    - matches:
+        - path:
+            type: PathPrefix
+            value: /
+      backendRefs:
+        - name: qwen3-32b-instruct
+          port: 8000