diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 6f5c863..4835811 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -21,6 +21,26 @@ spec: volumeAccessMode: ReadWriteOnce --- apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: qwen36-27b-fp8 + namespace: nim-service +spec: + source: + hf: + endpoint: "https://huggingface.co" + namespace: "Qwen" + authSecret: hf-api-secret + modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12 + pullSecret: ngc-secret + modelName: "Qwen3.6-27B-FP8" + storage: + pvc: + create: true + size: "50Gi" + volumeAccessMode: ReadWriteOnce +--- +apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService metadata: name: qwen3-32b-instruct