diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 4d02000..ee491cd 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -60,11 +60,9 @@ spec: - "--enable-prefix-caching" - '--speculative-config={"method":"mtp","num_speculative_tokens":2}' image: - repository: nvcr.io/nim/nvidia/llm-nim - tag: "1.12" + repository: vllm/vllm-openai + tag: "v0.19.1-cu130" pullPolicy: IfNotPresent - pullSecrets: - - ngc-secret authSecret: hf-api-secret storage: nimCache: