From 5954bb5202f5d87ed2933e17bcc083cfe81ce8d6 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 14:45:23 -0500 Subject: [PATCH] add HF cache --- clusters/k3s-dgx/nim-service/qwen.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 6f5c863..4835811 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -21,6 +21,26 @@ spec: volumeAccessMode: ReadWriteOnce --- apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: qwen36-27b-fp8 + namespace: nim-service +spec: + source: + hf: + endpoint: "https://huggingface.co" + namespace: "Qwen" + authSecret: hf-api-secret + modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12 + pullSecret: ngc-secret + modelName: "Qwen3.6-27B-FP8" + storage: + pvc: + create: true + size: "50Gi" + volumeAccessMode: ReadWriteOnce +--- +apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService metadata: name: qwen3-32b-instruct