add HF cache

This commit is contained in:
2026-05-07 14:45:23 -05:00
parent 138ebc8f61
commit 5954bb5202

View File

@@ -21,6 +21,26 @@ spec:
volumeAccessMode: ReadWriteOnce
---
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: qwen36-27b-fp8
namespace: nim-service
spec:
source:
hf:
endpoint: "https://huggingface.co"
namespace: "Qwen"
authSecret: hf-api-secret
modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12
pullSecret: ngc-secret
modelName: "Qwen3.6-27B-FP8"
storage:
pvc:
create: true
size: "50Gi"
volumeAccessMode: ReadWriteOnce
---
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: qwen3-32b-instruct