enable cache reuse
This commit is contained in:
@@ -45,6 +45,8 @@ spec:
|
||||
value: "true"
|
||||
- name: NIM_MAX_MODEL_LEN
|
||||
value: "32768"
|
||||
- name: NIM_ENABLE_KV_CACHE_REUSE
|
||||
value: "true"
|
||||
image:
|
||||
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
||||
tag: "1.1.0-variant"
|
||||
|
||||
Reference in New Issue
Block a user