This commit is contained in:
2026-05-06 17:33:57 -05:00
parent e90c7eeaa8
commit 98f39b7c68

View File

@@ -0,0 +1,20 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: qwen2.5-7b-instruct
namespace: nim-service
spec:
source:
ngc:
modelPuller: nvcr.io/nim/qwen/qwen-2.5-7b-instruct:latest
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: "vllm"
tensorParallelism: "1"
storage:
pvc:
create: true
size: "100Gi"
volumeAccessMode: ReadWriteOnce
---