From ebcc0cf0452e429185fcf991fd91ddb47215c3c9 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 11:26:30 -0500 Subject: [PATCH] enable cache reuse --- clusters/k3s-dgx/nim-service/qwen.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 7b3344f..176b1c0 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -45,6 +45,8 @@ spec: value: "true" - name: NIM_MAX_MODEL_LEN value: "32768" + - name: NIM_ENABLE_KV_CACHE_REUSE + value: "true" image: repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark tag: "1.1.0-variant"