diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml new file mode 100644 index 0000000..193bdc3 --- /dev/null +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -0,0 +1,20 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: qwen2.5-7b-instruct + namespace: nim-service +spec: + source: + ngc: + modelPuller: nvcr.io/nim/qwen/qwen-2.5-7b-instruct:latest + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: "vllm" + tensorParallelism: "1" + storage: + pvc: + create: true + size: "100Gi" + volumeAccessMode: ReadWriteOnce +---