apiVersion: apps.nvidia.com/v1alpha1 kind: NIMCache metadata: name: qwen2.5-7b-instruct namespace: nim-service spec: source: ngc: modelPuller: nvcr.io/nim/qwen/qwen-2.5-7b-instruct:latest pullSecret: ngc-secret authSecret: ngc-api-secret model: engine: "vllm" tensorParallelism: "1" storage: pvc: create: true size: "100Gi" volumeAccessMode: ReadWriteOnce ---