From 1a4e73b755ced3f695baae847c6b9fa46edbfdda Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 14:59:55 -0500 Subject: [PATCH] use vlmm openai --- clusters/k3s-dgx/nim-service/qwen.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 4d02000..ee491cd 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -60,11 +60,9 @@ spec: - "--enable-prefix-caching" - '--speculative-config={"method":"mtp","num_speculative_tokens":2}' image: - repository: nvcr.io/nim/nvidia/llm-nim - tag: "1.12" + repository: vllm/vllm-openai + tag: "v0.19.1-cu130" pullPolicy: IfNotPresent - pullSecrets: - - ngc-secret authSecret: hf-api-secret storage: nimCache: