From 98f39b7c68f7d35fafb27db8ab40ab7358f77417 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Wed, 6 May 2026 17:33:57 -0500 Subject: [PATCH] add qwen --- clusters/k3s-dgx/nim-service/qwen.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 clusters/k3s-dgx/nim-service/qwen.yaml diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml new file mode 100644 index 0000000..193bdc3 --- /dev/null +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -0,0 +1,20 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: qwen2.5-7b-instruct + namespace: nim-service +spec: + source: + ngc: + modelPuller: nvcr.io/nim/qwen/qwen-2.5-7b-instruct:latest + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: "vllm" + tensorParallelism: "1" + storage: + pvc: + create: true + size: "100Gi" + volumeAccessMode: ReadWriteOnce +---